def __init__(self, corpus_file_path): self.name_of_files = {} # Initializing the names of the files for i in range(0, 26): for j in range(0, 26): self.name_of_files["%s%s" % ((chr(ord('a') + i)), (chr(ord('a') + j)))] = Queue.Queue() for i in range(0, 10): for j in range(0, 10): self.name_of_files["%s%s" % (('%d' % i), ('%d' % j))] = Queue.Queue() self.name_of_files['other'] = Queue.Queue() # The file type is txt self.file_type = 'txt' # Creating the initial posting files self.create_initial_posting_file() # Initializing the Parser self.parser = Parser() # Initializing the ReadFile self.read_file = ReadFile(corpus_file_path) # Initializing the dictionary self.main_dictionary = {} self.temp_position_dic = {}
def start_indexing(dirs_list, dirs_dicts, main_path, posting_path, to_stem, start_index, end_index, directory): dirs_dicts[directory] = None reader = ReadFile() parser = Parse(main_path) indexer = Indexer(posting_path + directory) if to_stem: parser.to_stem = True indexer.to_stem = True if not os.path.exists(posting_path + directory): os.makedirs(posting_path + directory) documents = {} i = start_index while i < end_index: docs = reader.separate_docs_in_file(main_path + '\\corpus', dirs_list[i]) j = 0 for doc_id in docs: doc_dict = parser.main_parser(docs[doc_id].text, docs[doc_id]) docs[doc_id].text = None if i == end_index - 1 and j == len(docs) - 1: indexer.finished_parse = True indexer.index_terms(doc_dict, doc_id) documents[doc_id] = docs[doc_id] j += 1 i += 1 dirs_dicts[directory] = [ indexer.post_files_lines, indexer.terms_dict, documents, reader.languages ]
def __init__(self, dirName, stopWordsDict=None): self.dirName = dirName self.wordsDict = {} self.stopWordsDict = stopWordsDict self.totalWords = 0 fileList = os.listdir(dirName) self.count = len(fileList) for fileName in fileList: rdFileObj = ReadFile(dirName + '/' + fileName) wordsList = rdFileObj.getWords() self._populateWordsDict(wordsList)
def main(): parser = argparse.ArgumentParser() parser.add_argument("-d", "--dir",help="Fetch files from given directory & execute") parser.add_argument("-t", "--template", help="Provide template file for the document") parser.add_argument("-s", "--subject", help="provide subject name override in document") parser.add_argument("-n", "--name", help="name for the output doc file") parser.add_argument("-pn", "--pno", help="Practical No ") args = parser.parse_args() if not args.dir : print("directory should be specified") sys.exit(0) if not args.template: args.template = "template.docx" if not args.pno: args.pno = 1 if not args.name : args.name = "AnilSoni-" + str(datetime.now().strftime("%d-%m-%y %H-%M-%S")) + ".docx" #print(args.dir, args.template, args.name) files = GetFiles( args.dir ) path,list_files = files.get_file() path += "\\" #print("{}, {}".format(path,list_files)) #execute all files present in directory execute_program = ExecuteProgram() doc = CreateDocument(template= args.template, subject= args.subject, practicalno = args.pno) read_file_source = ReadFile() pic_name="" for pythonfile in list_files: #print("Executing {} ".format(pythonfile)) execute_program.execute(path=args.dir,filename=pythonfile) #execute_program.wait() sleep(5) pic = screenshot() pic_name = pythonfile[0:len(pythonfile)-3] + ".png" pic.save(pic_name) #print("screenshot saved with name {}".format(pic_name)) execute_program.terminate() source = read_file_source.read_file(filename = args.dir+pythonfile) #print(args.dir+pythonfile) source = "".join(source) print(execute_program.get_aim()) doc.set_aim_source_output(aim=execute_program.get_aim(), source=source, output= pic_name) #print("terminating cmd") sleep(2) #print("Terminating the process") doc.save_doc(args.name)
def __init__(self): self.main_path = '' self.posting_path = '' self.to_stem = False self.indexer = None self.reader = ReadFile() self.languages = set() self.searcher = None self.queries_docs_results = [] self.avg_doc_length = 0 self.with_semantics = False self.save_path = ''
def main(): p1 = ReadFile('./point_cloud_registration/pointcloud1.fuse') p2 = ReadFile('./point_cloud_registration/pointcloud2.fuse') icp = ICP(p1, p2) icp.setThres(0.1) icp.setTol(0.01) t = icp.iterativeClosestPoint() print('Rotation matrix') print(t[0]) print('Translation vector') print(t[1]) print('iteration No.') print(t[2])
class Semantics: def __init__(self): self.reader = ReadFile() self.path = 'D:\\Studies\\BGU\\semesterE\\IR\\IRProj\\SEproject\\corpus' self.sentences = [] def read_corpus(self): i = 0 dirs_list = os.listdir(self.path) while i < 3: # len(dirs_list) docs = self.reader.separate_docs_in_file(self.path, dirs_list[i]) for doc_id in docs: terms = gensim.utils.simple_preprocess(docs[doc_id].text) self.sentences.append(terms) i += 1 docs.clear() def start(self): # train model # model = Word2Vec(self.sentences, min_count=2) #model = gensim.models.Word2Vec(documents, size=150, window=10, min_count=2, workers=10) # model.train(self.sentences, total_examples=len(self.sentences), epochs=10) # save model # model.save('model.bin') # load model new_model = Word2Vec.load('model.bin') # access vector for one word w1 = 'intelligence' similar = new_model.wv.most_similar(positive=w1) print similar
def __init__(self, file_name, folds=5, splits=5,smote=True): super(Learner, self).__init__() self.file_name = file_name self.folds = folds self.splits = splits self.smote_val=smote if not file_name: raise Exception("Filename is required.") seed(0) self.data = Data() self.result = Result() self.predict = None ReadFile(file_name, self.data)
def main(): print('Start reading links ...') readlink = ReadFile('Partition6467LinkData.csv') mp = readlink.readLinks() print('Links reading complete ...') # print('Unit ready ...') print('Start reading probe points ...') readprobe = ReadFile('Partition6467ProbePoints.csv') if (len(sys.argv) > 1): print('Start reading %d probes ...' % int(sys.argv[1])) prbs = readprobe.readProbes(int(sys.argv[1])) else: print('Start reading %d probes ...' % 5000) prbs = readprobe.readProbes() sampleIds = prbs.keys() # print(prbs.items()) mtsq = list() print('Probes reading complete ...') for sid in sampleIds: print('Analyzing probes from sample No.%d ...' % sid) # __init__(self, plist, mp): mc = Match(prbs[sid], mp) print('\tStart calculating candidate points ...') pcps = mc.probeCandidates() print('\tCandidate points calculating complete ...') print('\tStart matching sequence ...') mtsq.extend(mc.findMatchedSequence(pcps)) print('\tSequence matching complete ...') print('Start writing matched sequence ...') wf = WriteFile() wf.writeMP(mtsq) print('Sequence writing complete ... ') print('Start calculating slopes ...') slope = Slope(mtsq, mp) slp = slope.calSlope() print('Slopes calculating complete ...') print('Start writing slopes ...') wf.writeS(slp) print('Slopes writing complete ...')
def __init__(self, file_path, target_location = None, regression = None, dates=None, days = None, isCars=False, ignore=None): # set class variables self.target_location = target_location self.file_path = file_path self.regression = regression self.date_index = dates self.days_index = days self.data = ReadFile.read(self,file_path) # remove specified ignored columns if ignore is not None: self.data = self.removeColumns(ignore) # cars is one of the only categorical sets we really need to map a lot of data in, so if its cars we runn it against # our set of conversions if isCars: for i in range(0,len(self.data)): for j in range(0,len(self.data[0])): value = self.data[i][j] if value in self.low_high_values: self.data[i][j] = self.low_high_values[value] # if it has dates assign it to above date values if dates is not None: for i in range(0,len(self.data)): self.data[i][dates] = self.date_values[self.data[i][dates]] if days is not None: for i in range(0,len(self.data)): self.data[i][days] = self.day_values[self.data[i][days]] # if we are regressing, really ensure all values are floats if regression: for i in range(0,len(self.data)): for j in range(0,len(self.data[0])): self.data[i][j] = float(self.data[i][j]) # if not regressing, and we have a target class, normalize data that is not class if target_location is not None and not regression: raw_data = self.separateClassFromData() np_array = numpy.array(raw_data) np_array = (np_array - np_array.min()) / (np_array.max() - np_array.min()) self.data = self.joinClassAndData(np_array)
def mapping(): """Read and allow the creation of the sub-pathway.""" form: InfoForm = InfoForm() pathway: List = list() gen_file: file = None if request.method == "POST": if form.validate_on_submit(): email: str = request.form["email"] option = request.form['options'] gene: str = request.form['gene'] basepairs: int = int(request.form['basepairs']) accession_number: str = request.form['accession_number'] ident: int = int(request.form['ident']) hit_size: int = int(request.form['hit_size']) expect: int = int(request.form['expect']) try: gen_file: file = request.files['upload'] except: pass if gen_file and allowed_file(gen_file.filename): filename: str = secure_filename(gen_file.filename) gen_file.save( os.path.join(app.config['UPLOAD_FOLDER'], filename)) readFile = ReadFile(os.path.join(UPLOAD_FOLDER, filename)) pathway = readFile.get_gene(option, gene, basepairs, ident / 100) else: filename = search.searchGenbank(accession_number) if not filename: return "Not found" readFile = ReadFile(filename) pathway = readFile.get_gene(option, gene, basepairs, ident / 100, hit_size, expect) if pathway != False: job_number = uuid.uuid4() insert_job_number(job_number) insert_job_data(job_number, pathway) ## send_email(email, job_number) return redirect(url_for("diagram", job_number=job_number)) else: return "Not found" else: return render_template('map.html', form=form) return render_template('map.html', form=form)
for pos in snake: pygame.draw.rect(screen, snake_color, (pos[1] * map_length / map_len + 1, pos[0] * map_length / map_len + 1, map_length / map_len, map_length / map_len), 0) pygame.display.update() def draw_food(map_len, screen, food_pos, map_length): food_color = (0, 166, 244) pygame.draw.rect(screen, food_color, (food_pos[1] * map_length / map_len + 1, food_pos[0] * map_length / map_len + 1, map_length / map_len, map_length / map_len)) if len(sys.argv) > 0: # x = ReadFile(sys.argv[1], "map-out.txt") x = ReadFile("test.txt", "map-out.txt") t = x.read_file() food_fen = FoodGenerator() for m in range(0,11): init_state = GameMap(t, None, [0, 2], [[0, 2], [0, 1], [0, 0]], True) init_state_diagonal = GameMap(t, None, [0, 2], [[0, 2], [0, 1], [0, 0]], False) init_state.set_food_position(food_fen.generate_food(init_state.game_map)) init_state_diagonal.set_food_position(food_fen.generate_food(init_state.game_map)) movement_list_manhattan = [] movement_list_diagonal = [] for i in range(0, 30): # print "Score=", i mySearch = AStarSearch(init_state)
filePath = options.directory outputDir = options.outputDir variable = options.para # global xaxis; # xaxis = options.xaxis # if directory[len(directory)-1] != '/': # directory = directory + '/' # print("Scan:", directory) # raw global g_dicData g_dicInputData = {} readFile = ReadFile() statComp = StatisticComp() # g_dicInputData = readFile.readDire(filePath) g_dicInputData = readFile.readSDire(filePath) # g_dicInputData = readFile.readDireTimeseries(filePath) print(g_dicInputData) # if DEBUG: # pprint.pprint(g_dicInputData) # print(len(g_dicInputData['5'])) # collectData(directory) # sys.exit() global g_dicMeanData
def setErrors(data): length = len(data) i, j = 0, 7 while j <= length: randIndex = randint(i, j - 1) temp = data[randIndex] temp = '1' if int(temp) == 0 else '0' data = data[:randIndex] + temp + data[randIndex + 1:] i = j j += 7 return data file = ReadFile('txt_files/Text.txt', analize=True) source = Huffman(file.data) encripted = source.encript(file.file_text) channel = Hamming() encripted = channel.encode(encripted) #set errors errorData = setErrors(encripted) writeToFile('Encripted Text', errorData) #fix errors channel = Hamming() trueData = channel.decode(errorData) decripted = source.decript(trueData) writeToFile('Decripted Text', decripted)
def __init__(self): self.reader = ReadFile() self.path = 'D:\\Studies\\BGU\\semesterE\\IR\\IRProj\\SEproject\\corpus' self.sentences = []
#!/usr/bin/python # -*- coding: UTF-8 -*- import numpy import struct import dtw from ReadFile import ReadFile from record import recoder from python_speech_features import mfcc import scipy.io.wavfile as wav f1 = ReadFile("mfcc/mfc/1.mfc") f2 = ReadFile("mfcc/mfc/2.mfc") f3 = ReadFile("mfcc/mfc/3.mfc") f4 = ReadFile("mfcc/mfc/4.mfc") f5 = ReadFile("mfcc/mfc/5.mfc") f6 = ReadFile("mfcc/mfc/6.mfc") f7 = ReadFile("mfcc/mfc/7.mfc") f8 = ReadFile("mfcc/mfc/8.mfc") f9 = ReadFile("mfcc/mfc/9.mfc") f10 = ReadFile("mfcc/mfc/10.mfc") N1 = f1.getN() N2 = f2.getN() N3 = f3.getN() N4 = f4.getN() N5 = f5.getN() N6 = f6.getN() N7 = f7.getN() N8 = f8.getN() N9 = f9.getN() N10 = f10.getN()
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2018/6/12 20:36 # @Author : nkuhjp # @Site : # @File : PageRank.py # @Software: PyCharm Community Edition import time from ReadFile import ReadFile from BlockStripePR import BlockStripePR if __name__ == '__main__': # 读取文件并分块 start_time = time.time() readFile = ReadFile() readFile.get_statistics() filename = input('输入要读取的文件名(默认为WikiData.txt):') block_num = input('输入要划分的块(默认为15):') if not block_num.isdigit() or int(block_num) <= 0: block_num = 15 else: block_num = int(block_num) readFile.read_file(filename, block_num) print('读取文件并分块用时: ', time.time() - start_time, '秒') # 根据前面的分块计算score start_time = time.time() blockStripePR = BlockStripePR(readFile.get_nodeId(), readFile.get_nodeNumber(), readFile.get_nodeList(),
# --------------------------------------------------------------------------------------------------------------------* # Author: AratioD @2016 # MULTIPLES OF X AND Y # Data Structure: One Main program and three class files. # Main class: MultiplesXY.py # Data class file 1/3: ReadFile.py: Class: ReadFile # Data class file 2/3: Calculate.py: Class: Calculate # Data class file 3/3: WriteFile.py: Class: HypotenusePrintClass # --------------------------------------------------------------------------------------------------------------------* # IMPORT all four classes from ReadFile import ReadFile readfile = ReadFile() from Calculate import Calculate calculate = Calculate() from WriteFile import WriteFile writefile = WriteFile() # MAIN PROGRAM---------------------------------------------------------------------------------------------------------* # Read the input data name from keyboard and returns the file name inputDatafileName = readfile.readInputFile() # Calculated the row amount of the file rowAmount = readfile.howManyLinesAreInTheInputFile(inputDatafileName)
# Github project link: https://github.com/wasymshykh/python-code-analyser from os import path from ReadFile import ReadFile from Calculate import Calculate if __name__ == '__main__': # File name and path file_name = 'test.java' file_path = path.join(path.dirname(path.abspath(__file__)), file_name) # Opening file r = ReadFile(file_path) # Source code analysing c = Calculate(r.get_lines_list_clean()) c.print_data()
def _test(res=''): start_time = time.time() filepath = '../data/' + res + '.csv' seed(1) np.random.seed(1) read = ReadFile(filepath) data, labels = read.build_table() ## Normalize Preprocessing #data = normalize(np.array(zip(*data)), norm='l2') #data=np.array(zip(*data)) split = split_two(np.array(data), np.array(labels)) pos = np.array(split['pos']) neg = np.array(split['neg']) ## 20% train and test final = {} result = {} cut_pos, cut_neg = cut_position(pos, neg, percentage=80) for learner in learners: start_time1 = time.time() l = [] x = {} measures = [ "Recall", "Precision", "Accuracy", "F_score", "False_alarm", "AUC" ] for q in measures: x[q] = [] for folds in range(15): pos_shuffle = range(0, len(pos)) neg_shuffle = range(0, len(neg)) shuffle(pos_shuffle) shuffle(neg_shuffle) pos = pos[pos_shuffle] neg = neg[neg_shuffle] data_train, train_label, data_test, test_label = divide_train_test( pos, neg, cut_pos, cut_neg) de = DE(F=0.7, CR=0.3, x='rand') global max_fitness max_fitness = 0 pop = [[ choice(bounds[0]), choice(bounds[1]), choice(bounds[2]), uniform(bounds[3][0], bounds[3][1]) ] for _ in range(10)] v, score, final_para_dic = de.solve(main, pop, iterations=3, file=res, data_samples=data_train, target=train_label, learner=learner) model = Learner(samples=np.vstack((data_train, data_test)), labels=train_label + test_label, smote=True, v=v, percentage=80) model.run(learners=[learner]) x["Accuracy"].append(model.get_accuracy()[0][1]) x["F_score"].append(model.get_f_score()[0][1]) x["Precision"].append(model.get_precision()[0][1]) x["Recall"].append(model.get_recall()[0][1]) x["False_alarm"].append(model.get_false_alarm()[0][1]) x["AUC"].append(model.get_auc()[0][1]) l.append([v, score, final_para_dic]) print(x) result[learner] = [x, l, time.time() - start_time1] final[res] = result print(final) with open('../dump/auc/' + res + '_auc1.pickle', 'wb') as handle: pickle.dump(final, handle)
__author__ = 'oupeng' # -*- conding:utf8 -*- from ReadFile import ReadFile from ThreadOperation import ThreadOperation if __name__=='__main__': rf=ReadFile("newUtilityRecords.txt") if rf.validatePath()==True: eList=rf.parseRow() NumOfThread=len(eList) # for one in eList: # executer(one) threads=[] for i in range(0,NumOfThread): threads.append(ThreadOperation(eList[i]).start()) for t in threads: t.join() answer=raw_input("please input y to exit") while(answer!='y'): answer=raw_input("please input y to exit")
from Join_Ftr import * from ReadFile import ReadFile from GS_Face import GsFace feature_pth = 'E:\\GPforFR\\data\\lfw_feature5' instruc_pth_t = 'E:\\GPforFR\\data\\lfw_view1\\pairsDevTrain.txt' instruc_pth_s = 'E:\\GPforFR\\data\\lfw_view1\\pairsDevTest.txt' num = 5 read_file = ReadFile(instruc_pth_t, num) X1 = read_file.person_pair() + read_file.person_mispair() read_file = ReadFile(instruc_pth_s, num) X2 = read_file.person_pair() + read_file.person_mispair() gs_feature = Join_Ftr() Xtar, Ytar = gs_feature.Constrct_XY(feature_pth, X1) Xsrc, Ysrc = gs_feature.Constrct_XY(feature_pth, X2) Xt_in, Yt_in = gs_feature.XY_in(Xtar, Ytar) Xs_in, Ys_in = gs_feature.XY_in(Xsrc, Ysrc) gsface = GsFace(Xt_in, Xs_in)
class Indexer: name_of_files = None # The names of the initial posting files file_type = None # The file type of the posting files parser = None # The parser of the project read_file = None # The ReadFile of the class main_dictionary = None temp_position_dic = None # The constructor of the class def __init__(self, corpus_file_path): self.name_of_files = {} # Initializing the names of the files for i in range(0, 26): for j in range(0, 26): self.name_of_files["%s%s" % ((chr(ord('a') + i)), (chr(ord('a') + j)))] = Queue.Queue() for i in range(0, 10): for j in range(0, 10): self.name_of_files["%s%s" % (('%d' % i), ('%d' % j))] = Queue.Queue() self.name_of_files['other'] = Queue.Queue() # The file type is txt self.file_type = 'txt' # Creating the initial posting files self.create_initial_posting_file() # Initializing the Parser self.parser = Parser() # Initializing the ReadFile self.read_file = ReadFile(corpus_file_path) # Initializing the dictionary self.main_dictionary = {} self.temp_position_dic = {} # def add_to_dicts(self): # numOfDoc, text = self.read_file.getFile() # counter = 0 # num_of_docs = 1000 # parser_threads = [] # add_to_dictionary_threads = [] # text_to_thread = [] # while text is not "all docs are received" and counter!=1000: # counter += 1 # print(counter) # if counter % num_of_docs ==0: # thread = ParserThread(self.queue_of_parser,self.parser,text_to_thread) # text_to_thread = [] # parser_threads.append(thread) # thread.start() # thread = AddToDictionaryThread(self.queue_of_parser,num_of_docs,self.read_file,self.main_dictionary) # add_to_dictionary_threads.append(thread) # thread.start() # text=self.find_sub("TEXT",text) # text_to_thread.append((text,counter)) #dictionary_of_words, dictionary_of_unique_terms, max_freq=self.parser.parse_to_unique_terms(text) #self.add_to_main_dictionary_spacial(dictionary_of_unique_terms) #for key in dictionary_of_words: # self.add_term_to_dictionary(key,dictionary_of_words[key]) #self.read_file.add_to_max_values_dict(max_freq,numOfDoc) # numOfDoc, text = self.read_file.getFile() #if len(text_to_thread)!=0: # thread = ParserThread(self.queue_of_parser, self.parser, text_to_thread) # parser_threads.append(thread) # thread.start() # thread = AddToDictionaryThread(self.queue_of_parser, len(text_to_thread), self.read_file, self.main_dictionary) # add_to_dictionary_threads.append(thread) # thread.start() #print "wait for parsing" #for i in range(0,len(parser_threads)): # parser_threads[i].join() #print "wait for dic" #for i in range(0,len(add_to_dictionary_threads)): # add_to_dictionary_threads[i].join() #print(self.main_dictionary) def add_to_dicts2(self): numOfDoc, text = self.read_file.getFile() coun = 0 num = 0 while text is not "all docs are received" and coun != 1000: coun += 1 print(coun) text = self.find_sub("TEXT", text) dictionary_of_words, dictionary_of_unique_terms, max_freq = self.parser.parse_to_unique_terms( text) self.add_to_main_dictionary_spacial(dictionary_of_unique_terms, numOfDoc) for key in dictionary_of_words: key = self.add_term_to_dictionary(key) self.add_term_to_queue(key, numOfDoc, dictionary_of_words[key]) self.read_file.add_to_max_values_dict(max_freq, numOfDoc) numOfDoc, text = self.read_file.getFile() def index_files(self): thread_add_to_dic = threading.Thread(target=self.add_to_dicts2()) threads = [] for name in self.name_of_files: thread = Consumer(self.name_of_files[name], name, self.file_type, self.main_dictionary, 1, self.temp_position_dic) threads.append(thread) thread.start() thread_add_to_dic.start() thread_add_to_dic.join() for i in range(0, len(threads)): threads[i].stop_thread() for i in range(0, len(threads)): threads[i].join() def add_to_main_dictionary_spacial(self, dict, doc_id): for key in dict: if key in self.main_dictionary: self.main_dictionary[key][0] = self.main_dictionary[key][0] + 1 else: self.main_dictionary[key] = [1, -1] self.temp_position_dic[key] = [-1, -1] self.add_term_to_queue(key, doc_id, self.main_dictionary[key][0]) def add_term_to_queue(self, term, doc_id, tf): note = term[0].lower() note2 = note flag1 = (note >= 'a' and note <= 'z') flag2 = (note >= '0' and note <= '9') if flag1 or flag2: if len(term) > 1: low = term[1].lower() if ((low >= 'a' and low <= 'z') and flag1) or ((low >= '0' and low <= '9') and flag2): note2 = term[1].lower() self.name_of_files["%s%s" % (note, note2)].put((term, doc_id, tf)) return self.name_of_files['other'].put((term, doc_id, tf)) # This function will create the initial posting files def create_initial_posting_file(self): # Go through every file name and create it for name in self.name_of_files: file = open("%s.%s" % (name, self.file_type), "w") file.close() #os.remove(file.name) # just for now # This function will add a term to the dictionary def add_term_to_dictionary(self, term): # If the term is already in the dictionary if term in self.main_dictionary: self.main_dictionary[term][0] = self.main_dictionary[term][0] + 1 return term # If the term starts with a capital letter if term[0] >= 'A' and term[0] <= 'Z': lower = term.lower() # If the term is already in the dictionary in small letters if lower in self.main_dictionary: self.main_dictionary[lower][ 0] = self.main_dictionary[lower][0] + 1 return lower elif term[0] >= 'a' and term[0] <= 'z': upper = term.upper() # If the term is already in the dictionary in small letters if upper in self.main_dictionary: self.main_dictionary[term] = [ self.main_dictionary[upper][0] + 1, -1 ] self.temp_position_dic[term] = [ self.temp_position_dic[key][0], self.temp_position_dic[key][1] ] del self.temp_position_dic[upper] del self.main_dictionary[upper] return term # If the term is new in the dictionary self.main_dictionary[term] = [1, -1] self.temp_position_dic[term] = [-1, -1] return term # This function will return the string between two tags def find_sub(self, tag, string): start = "<" + tag + ">" end1 = "</" + tag + ">" string = string[string.find(start) + len(start):string.find(end1)] return string
class Main: """ Class Description : Implements the Controller of the MVC model, runs the project. """ """ Desctiption This method is for initializing Main's properties """ def __init__(self): self.main_path = '' self.posting_path = '' self.to_stem = False self.indexer = None self.reader = ReadFile() self.languages = set() self.searcher = None self.queries_docs_results = [] self.avg_doc_length = 0 self.with_semantics = False self.save_path = '' """ Description : This method manages the program """ def start(self): self.indexer = Indexer(self.posting_path) if self.to_stem: self.indexer.to_stem = True dirs_list = os.listdir(self.main_path + '\\corpus') # Create temp postings Multiprocessing dirs_dict = ParallelMain.start(self.main_path, self.posting_path, self.to_stem, dirs_list) # Merging dictionaries that were created by the processes docs = {} files_names = [] post_files_lines = [] total_length = 0 for dir in dirs_dict.keys(): tmp_docs_dict = dirs_dict[dir][2] for doc_id in tmp_docs_dict: docs[doc_id] = tmp_docs_dict[doc_id] total_length += docs[doc_id].length for lang in dirs_dict[dir][3]: self.languages.add(lang) old_post_files_lines = dirs_dict[dir][0] for i in range(0, len(old_post_files_lines)): files_names.append(dir + "\\Posting" + str(i) if not self.to_stem else dir + "\\sPosting" + str(i)) post_files_lines.append(old_post_files_lines[i]) self.avg_doc_length = total_length / len(docs) # Gets Cities that appear in the corpus i = 0 while i < len(dirs_list): self.reader.read_cities(self.main_path + '\\corpus', dirs_list[i]) i += 1 terms_dicts = [ dirs_dict["\\Postings1"][1], dirs_dict["\\Postings2"][1], dirs_dict["\\Postings3"][1], dirs_dict["\\Postings4"][1] ] terms_dict = Merge.start_merge(files_names, post_files_lines, terms_dicts, self.posting_path, self.to_stem) self.indexer.docs_avg_length = self.avg_doc_length self.indexer.terms_dict = terms_dict self.indexer.docs_dict = docs self.indexer.index_cities(self.reader.cities) self.indexer.post_pointers(self.languages) # self.searcher = Searcher(self.main_path, self.posting_path, self.indexer.terms_dict, self.indexer.cities_dict, # self.indexer.docs_dict, self.avg_doc_length, self.to_stem, self.with_semantics) # self.searcher.model = Word2Vec.load('model.bin') # path = self.posting_path + '\FinalPost' + '\Final_Post' # linecache.getline(path, 500000) """ Description : This method calls the Indexer function for loading saved files to the programs main memory """ def load(self): self.indexer = Indexer(self.posting_path) if self.to_stem: self.indexer.to_stem = True self.languages = self.indexer.load() self.avg_doc_length = self.indexer.docs_avg_length self.searcher = Searcher(self.main_path, self.posting_path, self.indexer.terms_dict, self.indexer.cities_dict, self.indexer.docs_dict, self.avg_doc_length, self.to_stem, self.with_semantics) self.searcher.model = Word2Vec.load(self.posting_path + '//model.bin') """ Description : This method erases all of the files in the Posting path """ def reset(self): shutil.rmtree(self.posting_path) if not os.path.exists(self.posting_path): os.makedirs(self.posting_path) self.indexer = None """ Description : This method returns the terms dictionary, used by GUI IndexView for showing the dictionary. """ def get_terms_dict(self): return self.indexer.terms_dict """ Description : This method returns the Languages of the corpus, used by GUI IndexView for showing the lagnuages. """ def get_languages(self): # should return string with languages separated with '\n' return self.languages """ Description : This method gets the corpus path from the GUI """ def set_corpus_path(self, path): self.main_path = path """ Description : This method gets the posting path from the GUI """ def set_posting_path(self, path): self.posting_path = path """ Description : This method gets the stemming bool from the GUI """ def set_stemming_bool(self, to_stem): self.to_stem = to_stem def set_with_semantics(self, with_semantics): self.with_semantics = with_semantics self.searcher.with_semantics = with_semantics def report(self): num_count = 0 i = 0 freq = {} for term in self.indexer.terms_dict.keys(): if Parse.isFloat(term): num_count += 1 freq[term] = self.indexer.terms_dict[term][1] freq_list = sorted(freq.items(), key=itemgetter(1)) with open('frequency.txt', 'wb') as f: for n in freq_list: f.write(str(n[0]) + ": " + str(n[1]) + '\n') print "Num of terms which are nums: " + str(num_count) print "Num of countries: " + str(len(self.indexer.countries)) print "Num of capitals: " + str(self.indexer.num_of_capitals) def set_save_path(self, dir_path): self.save_path = dir_path def save(self): file_name = '' if self.to_stem: file_name += 's' if self.with_semantics: file_name += 's' file_name = '\\' + file_name + 'results.txt' with open(self.save_path + file_name, 'a+') as f: for query_result in self.queries_docs_results: for doc in query_result[2]: line = " {} 0 {} 1 42.38 {}\n".format( query_result[0], doc[0], 'rg') f.write(line) def get_cities_list(self): if self.indexer is None: return None return self.indexer.cities_dict.keys() def start_query_search(self, query, chosen_cities): return self.searcher.search(query, chosen_cities) def start_file_search(self, queries_path_entry, chosen_cities): queries_list = [] current_queries_results = [] with open(queries_path_entry, 'rb') as f: lines = f.readlines() id = 0 i = 0 query = '' narr = '' while i < len(lines): if '<num>' in lines[i]: id = lines[i].split(':')[1].replace('\n', '') elif '<title>' in lines[i]: query = lines[i].replace('<title>', '').replace('\n', '') elif '<desc>' in lines[i]: i += 1 while not '<narr>' in lines[i]: query = '{} {}'.format( query, lines[i].replace('<title>', '').replace('\n', '')) i += 1 queries_list.append((id, query)) i += 1 for query_tuple in queries_list: docs_result = self.start_query_search(query_tuple[1], chosen_cities) tmp = (query_tuple[0], query_tuple[1], docs_result) current_queries_results.append(tmp) self.queries_docs_results.append(tmp) return self.queries_docs_results def get_doc_five_entities(self, doc_id): return self.searcher.docs_dict[doc_id].five_entities
from Controller import Controller from CreateQuestions import CreateQuestions from ReadFile import ReadFile from View import View c = Controller() v = View(c) v.start() readfile = ReadFile() createQuestions = CreateQuestions() file = open("./text.txt", 'r') file_text = file.read() file.close() file_text = readfile.parse(file_text) questions = createQuestions.create_questions(file_text) for q in questions: print(q) for answer in questions.get(q): print(answer)
def file_system(self): while True: # Features.feature_list(self) choice = input("\nChoose Your Option: ") if choice == '1': folder_name = input("\nEnter The Folder Name: ") folder_address = os.path.join(os.getcwd(), folder_name) CreateFolder.folder(self, folder_address) folder_location[folder_name] = folder_address elif choice == '2': file_name = input("\nEnter The File Name: ") file_address = os.path.join(os.getcwd(), file_name) CreateFile.create_file(self, file_address) file_location[file_name] = file_address elif choice == '3': while True: print("Where You Want To Move?") print("(1) File To Folder") print("(2) Folder To Folder") move = input("\nEnter Your Choice: ") if move == '1': source = input("\nEnter The Source File Name: ") source_address = file_location[source] destination = input( "\nEnter The Destination Folder Name: ") destination_address = folder_location[destination] AddDirectory.add(self, source_address, destination_address) file_location[source] = os.path.join( destination_address, source) break elif move == '2': source = input("\nEnter The Source Folder Name: ") source_address = folder_location[source] destination = input( "Enter The Destination Folder Name: ") destination_address = folder_location[destination] AddDirectory.add(self, source_address, destination_address) folder_location[source] = os.path.join( destination_address, source) break else: print("Invalid Input, Type 1 or 2") elif choice == '4': while True: print("(1) Write in a New File") print("(2) Write in an Existing File") write_choice = input("\nChoose Your Option: ") if write_choice == '1': file = input("Enter The File Name: ") file_address = os.path.join(os.getcwd(), file) content = input( "\nEnter The Content You Want To Write: ") WriteFile.write(self, file_address, content) file_location[file] = file_address break elif write_choice == '2': file = input("\nEnter The File Name: ") file_address = file_location[file] content = input( "\nEnter The Content You Want To Write: ") WriteFile.write(self, file_address, content) break else: print("Invalid Input, Type 1 or 2") elif choice == '5': file = input("\nEnter The File Name: ") file_address = file_location[file] ReadFile.read(self, file_address) elif choice == '6': file = input("\nEnter File Name: ") file_address = file_location[file] ClearFile.clear(self, file_address) elif choice == '7': file = input("\nEnter File Name: ") file_address = file_location[file] ContentLength.get_length(self, file_address) elif choice == '8': folder = input("\nEnter Folder Name: ") folder_address = folder_location[folder] FolderSize.folder_size(self, folder_address) elif choice == '9': required_folder = input("\nEnter Folder Name: ") FolderFiles.file_list(self, folder_location[required_folder]) elif choice == '10': folder = input("\nEnter Folder Name: ") WalkTree.tree(self, folder_location[folder]) elif choice == '11': print(os.getcwd()) elif choice == '12': FolderFiles.file_list(self, os.getcwd()) elif choice == '13': print("\nGood Bye. See you Soon") exit() else: print("\nInvalid Choice")
filePath = options.directory outputDir = options.outputDir global xaxis xaxis = options.xaxis # if directory[len(directory)-1] != '/': # directory = directory + '/' # print("Scan:", directory) # raw global g_dicData g_dicInputData = {} readFile = ReadFile() g_dicInputData = readFile.collectData(filePath) if DEBUG: pprint.pprint(g_dicInputData) # collectData(directory) global g_dicMeanData g_dicMeanData = {} meanCompute(g_dicInputData, g_dicMeanData) global g_cdfData g_cdfData = []
import WordOperations from ReadFile import ReadFile if __name__ == '__main__': print("Welcome To Hangman!") print("> The Computer Will Guess Your word using your hints.") # read file contents to list ReadFile() def getCount(): count = input( "Hint 1 - What is The Word count? (Must be between 4-7!): ") # if number is not within range ask again. if int(count) > 7 or int(count) < 4: print("Number must be within the range 4-7") getCount() return count WordOperations.RemoveExtraByRange(getCount()) WordOperations.RemoveExtraByFLetter( input("Hint 2 - Whats the First Letter of the word?: ")) WordOperations.RemoveExtraByLLetter( input("Hint 3 - Whats the Last Letter of the word?: ")) WordOperations.RemoveExtraBy2L() WordOperations.RemoveExtraBy3L()
from ReadFile import ReadFile import pprint pp = pprint.PrettyPrinter(indent=2) # #Instancia da Classe read = ReadFile() # #Método que lê um arquivo e retorna uma lista. pib_txt = read.list_file('pib.txt') regioes_txt = read.list_file('regioes.txt') #--------- # --------- # ---------- # ------------ #Método que calcula o PIB TOTAL por Regioes soma_regioes = read.sum_regioes(regioes_txt, pib_txt) teste = read.somatoria(soma_regioes) print('Somatória do PIB por Regiões') pp.pprint(teste) print('------------------------') #--------- # --------- # ---------- # ------------ # Método que calcula porcentagem do PIB por estado print('Porcentagem do PIB por Estados') pib_states = read.pib_states(pib_txt) pp.pprint(pib_states) #--------- # --------- # ---------- # ------------
i += 1 return r[200:] # c = 1 # while c < 21: # u = str("rafliA" + str(c)) # print("Processing... ", u) # a,b = ReadFile(u) # # print(len(a)) # a = signalProcessing(a, 15, 20, 500) # b = signalProcessing(b, 15, 20, 500) # plotData2(a,b,15, str(str(u)+"Filter"), "") # c = c + 1 a, b = ReadFile("/ilham/data/ilham20keC1") print("Processing..") ap = int(len(a) / 15) bp = int(len(b) / 15) a = FunctionOnlyone.bandpass_firwin_filter(a, 1200, 20, 500, len(a) / 15) b = FunctionOnlyone.bandpass_firwin_filter(b, 1200, 20, 500, len(b) / 15) a = FunctionOnlyone.fastFourierTransform(a, 15, name="FFTilhamke20C1A", fs=len(a) / 15) b = FunctionOnlyone.fastFourierTransform(b, 15, name="FFTilhamke20C1B", fs=len(b) / 15) # a = FunctionOnlyone.welchFunction(a, 15) # b = FunctionOnlyone.welchFunction(a, 15)
from ReadFile import ReadFile import preprocess import dbn_model import utils root = utils.get_root_path(False) read = ReadFile(root + "/NSL_KDD-master").get_data() data_pp = preprocess.Preprocess(read).do_predict_preprocess() dbn_model.DBN(data_pp).do_dbn(action='yadlt') do_dbn = dbn_model.DBN(data_pp).do_dbn_with_weight_matrix(root + "/save") print("[end]test_dbn")