def _main(): """ Main function. Prints the AST of a file. """ config = Config() filesname = get_opts(sys.argv[1:], config) files = [] if len(filesname) == 1 and os.path.isfile(filesname[0]): for line in ast(config, '\n'.join(open(filesname[0]).readlines())): print line return for filename in filesname: if os.path.isdir(filename): for root, dirs, d_files in os.walk(filename): for d_file in d_files: files.append(root + os.sep + d_file) else: files.append(filename) files = filter(lambda item: item.endswith('.py'), files) docs = read_docs_from_list(config, files) y = docs.values()[0] conn = similarity.main(config['sim'], [x for x in docs.values() if x]) print "Com erro de sintax: %s. Sem erro de sintax: %s." % (len([x for x in docs.values() if not x]), (len([x for x in docs.values() if x]))) linhaString = [] linhaString.append("") for i in range(len(docs.values())): linhaString.append("e" + str(i)) #print linhaString with open('test.csv', 'wb') as csvfile: spamwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) for d1, d2, value in conn: #print d1.name, d2.name, value spamwriter.writerow([d1.name, d2.name, value]) linhaAnterior = "" listaColunas = [] auxLista = [] with open('test2.csv', 'wb') as csvfile: spamwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) spamwriter.writerow(linhaString) for d1, d2, value in conn: #print d1.name, d2.name, value auxNomeQuestao = splitNomeQuesta(d1.name) #spamwriter.writerow([test]) if (not auxLista.__contains__(auxNomeQuestao)): auxLista.append(auxNomeQuestao) listaColunas.append({auxNomeQuestao: [value]}) elif (auxLista.__contains__(auxNomeQuestao)): auxKey = getObjeto(auxLista, auxNomeQuestao) for item in listaColunas: if (item.has_key(auxKey)): item.get(auxKey).append(value) for i in listaColunas: spamwriter.writerow([i, len(i.values()[0])]) spamwriter.writerow([])
def search_most_similar_movie(standard_title): global G, max, mLst resList = [] # DB 폴더를 선택 dirName = "txt" file_list = glob.glob(os.path.join(dirName, "*_append3.txt")) dic = {} resList.append(standard_title) # 폴더 내의 txt파일 for inFile in file_list: # 파일명 추출 filename = os.path.basename(inFile).split(".")[0] filename = filename[:-8] # print(filename) for str in mLst: if filename == str: # 같은 파일끼리는 X if filename == standard_title: resList.append(0) else: cor = 0 dist = similarity.main(standard_title, filename) print(standard_title, "--", filename, dist) # if dist > 70: # cor = 100 # elif dist > 60: # cor = 80 # elif dist > 50: # cor = 60 # elif dist > 40: # cor = 50 # elif dist > 30: # cor = 40 # elif dist > 20: # cor = 30 # elif dist > 15: # cor = 20 # elif dist > 10: # cor = 10 # elif dist > 5: # cor = 5 # else: # cor = 1 resList.append(1 / dist) # # similarity 계산 후 dictionary에 저장 (key : filename, value : similarity) # if filename not in dic.keys(): # # G.add_edge(standard_title, filename, distance= 35-dist) # dic[filename] = dist return resList
def _main(): """ Main function. Prints the AST of a file. """ config = Config() filesname = get_opts(sys.argv[1:], config) files = [] if len(filesname) == 1 and os.path.isfile(filesname[0]): for line in ast(config, '\n'.join(open(filesname[0]).readlines())): print line return for filename in filesname: if os.path.isdir(filename): for root, dirs, d_files in os.walk(filename): for d_file in d_files: files.append(root + os.sep + d_file) else: files.append(filename) files = filter(lambda item: item.endswith('.py'), files) docs = read_docs_from_list(config, files) conn = similarity.main(config['sim'], [x for x in docs.values() if x]) for d1, d2, value in conn: print d1.name, d2.name, value
def _main(): """ Main function. Prints the AST of a file. """ config = Config() filesname = get_opts(sys.argv[1:], config) files = [] if len(filesname) == 1 and os.path.isfile(filesname[0]): for line in ast(config, '\n'.join(open(filesname[0]).readlines())): print line return for filename in filesname: if os.path.isdir(filename): for root, dirs, d_files in os.walk(filename): for d_file in d_files: files.append(root + os.sep + d_file) else: files.append(filename) files = filter(lambda item: item.endswith('.py'), files) docs = read_docs_from_list(config, files) y = docs.values()[0] conn = similarity.main(config['sim'], [x for x in docs.values() if x]) print "Com erro de sintax: %s. Sem erro de sintax: %s." % ( len([x for x in docs.values() if not x]), (len([x for x in docs.values() if x]))) linhaString = [] linhaString.append("") for i in range(len(docs.values())): linhaString.append("e" + str(i)) #print linhaString with open('test.csv', 'wb') as csvfile: spamwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) for d1, d2, value in conn: #print d1.name, d2.name, value spamwriter.writerow([d1.name, d2.name, value]) linhaAnterior = "" listaColunas = [] auxLista = [] with open('test2.csv', 'wb') as csvfile: spamwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) spamwriter.writerow(linhaString) for d1, d2, value in conn: #print d1.name, d2.name, value auxNomeQuestao = splitNomeQuesta(d1.name) #spamwriter.writerow([test]) if (not auxLista.__contains__(auxNomeQuestao)): auxLista.append(auxNomeQuestao) listaColunas.append({auxNomeQuestao: [value]}) elif (auxLista.__contains__(auxNomeQuestao)): auxKey = getObjeto(auxLista, auxNomeQuestao) for item in listaColunas: if (item.has_key(auxKey)): item.get(auxKey).append(value) for i in listaColunas: spamwriter.writerow([i, len(i.values()[0])]) spamwriter.writerow([])
def exec_function(self, args): new_args = [] local_output = None train_data = None local_model_dir = None if args.cmd == "simtrain": local_model_dir = tempfile.mkdtemp() if args.cmd == "simapply": local_model_dir = self.convert_to_local_file([args.mdir], is_dir=True)[0] new_args.extend( [ "-mdir", local_model_dir, "-batch_size", str(args.batch_size), "-seed", str(args.seed), ] ) if args.debug: new_args.append("-debug") if args.cmd == "simtrain": setCUDA_VISIBLE_DEVICES(args.gpuid) train_data = self.prepare_train_data(args) # TODO: if user provides tokenization config file # '-src_tok', self.convert_to_local_file([args.src_tok])[0], # '-tgt_tok', self.convert_to_local_file([args.tgt_tok])[0], new_args.extend( [ "-trn", train_data["train"], "-dev", train_data["dev"], "-src_voc", train_data["src_voc"], "-tgt_voc", train_data["tgt_voc"], "-src_emb_size", str(args.src_emb_size), "-tgt_emb_size", str(args.tgt_emb_size), "-src_lstm_size", str(args.src_lstm_size), "-tgt_lstm_size", str(args.tgt_lstm_size), "-lr", str(args.lr), "-lr_decay", str(args.lr_decay), "-lr_method", args.lr_method, "-aggr", args.aggr, "-r", str(args.r), "-dropout", str(args.dropout), "-mode", args.mode, "-max_sents", str(args.max_sents), "-n_epochs", str(args.n_epochs), "-report_every", str(args.report_every), ] ) if args.src_emb: new_args.extend( ["-src_emb", self.convert_to_local_file([args.src_emb])[0]] ) if args.tgt_emb: new_args.extend( ["-tgt_emb", self.convert_to_local_file([args.tgt_emb])[0]] ) if args.cmd == "simapply": local_src_file = self.convert_to_local_file([args.tst_src])[0] local_tgt_file = self.convert_to_local_file([args.tst_tgt])[0] new_args.append("-tst") new_args.append(local_src_file + "," + local_tgt_file) if args.epoch: new_args.append("-epoch") new_args.append(str(args.epoch)) new_args.append("-output") if args.output == "-": new_args.append(args.output) else: local_output = tempfile.NamedTemporaryFile(delete=False) new_args.append(local_output.name) if args.q: new_args.append("-q") if args.show_matrix: new_args.append("-show_matrix") if args.show_svg: new_args.append("-show_svg") if args.show_align: new_args.append("-show_align") if args.show_last: new_args.append("-show_last") if args.show_aggr: new_args.append("-show_aggr") logger.info("command line option: %s" % " ".join(new_args)) main(["similarity.py"] + new_args) if args.cmd == "simtrain": default_sp_model_name = "joint_spm_50k.model" generate_default_tok_config( os.path.join(local_model_dir, "tokenization_src.json"), default_sp_model_name, source=True, ) generate_default_tok_config( os.path.join(local_model_dir, "tokenization_tgt.json"), default_sp_model_name, source=False, ) os.rename( train_data["sp_model"], os.path.join(local_model_dir, default_sp_model_name), ) os.remove(train_data["train"]) os.remove(train_data["dev"]) os.remove(train_data["src_voc"]) os.remove(train_data["tgt_voc"]) self._storage.push(local_model_dir, args.mdir) shutil.rmtree(local_model_dir) if args.cmd == "simapply" and local_output is not None: self._storage.push(local_output.name, args.output)
import similarity import wordsense name = input("Enter one or more words (separated by a space): ") words = name.split() for word in words: wordsense.main(word) similarity.main(word)
def exec_function(self, args): new_args = [] local_output = None train_data = None local_model_dir = None if args.cmd == 'simtrain': local_model_dir = tempfile.mkdtemp() if args.cmd == 'simapply': local_model_dir = self.convert_to_local_file([args.mdir], is_dir=True)[0] new_args.extend([ '-mdir', local_model_dir, '-batch_size', str(args.batch_size), '-seed', str(args.seed) ]) if args.debug: new_args.append('-debug') if args.cmd == 'simtrain': setCUDA_VISIBLE_DEVICES(args.gpuid) train_data = self.prepare_train_data(args) # TODO: if user provides tokenization config file # '-src_tok', self.convert_to_local_file([args.src_tok])[0], # '-tgt_tok', self.convert_to_local_file([args.tgt_tok])[0], new_args.extend([ '-trn', train_data['train'], '-dev', train_data['dev'], '-src_voc', train_data['src_voc'], '-tgt_voc', train_data['tgt_voc'], '-src_emb_size', str(args.src_emb_size), '-tgt_emb_size', str(args.tgt_emb_size), '-src_lstm_size', str(args.src_lstm_size), '-tgt_lstm_size', str(args.tgt_lstm_size), '-lr', str(args.lr), '-lr_decay', str(args.lr_decay), '-lr_method', args.lr_method, '-aggr', args.aggr, '-r', str(args.r), '-dropout', str(args.dropout), '-mode', args.mode, '-max_sents', str(args.max_sents), '-n_epochs', str(args.n_epochs), '-report_every', str(args.report_every) ]) if args.src_emb: new_args.extend(['-src_emb', self.convert_to_local_file([args.src_emb])[0]]) if args.tgt_emb: new_args.extend(['-tgt_emb', self.convert_to_local_file([args.tgt_emb])[0]]) if args.cmd == 'simapply': local_src_file = self.convert_to_local_file([args.tst_src])[0] local_tgt_file = self.convert_to_local_file([args.tst_tgt])[0] new_args.append('-tst') new_args.append(local_src_file + ',' + local_tgt_file) if args.epoch: new_args.append('-epoch') new_args.append(str(args.epoch)) new_args.append('-output') if args.output == '-': new_args.append(args.output) else: local_output = tempfile.NamedTemporaryFile(delete=False) new_args.append(local_output.name) if args.q: new_args.append('-q') if args.show_matrix: new_args.append('-show_matrix') if args.show_svg: new_args.append('-show_svg') if args.show_align: new_args.append('-show_align') if args.show_last: new_args.append('-show_last') if args.show_aggr: new_args.append('-show_aggr') logger.info("command line option: %s" % " ".join(new_args)) main(['similarity.py'] + new_args) if args.cmd == 'simtrain': default_sp_model_name = 'joint_spm_50k.model' generate_default_tok_config(os.path.join(local_model_dir, 'tokenization_src.json'), default_sp_model_name, source=True) generate_default_tok_config(os.path.join(local_model_dir, 'tokenization_tgt.json'), default_sp_model_name, source=False) os.rename(train_data['sp_model'], os.path.join(local_model_dir, default_sp_model_name)) os.remove(train_data['train']) os.remove(train_data['dev']) os.remove(train_data['src_voc']) os.remove(train_data['tgt_voc']) self._storage.push(local_model_dir, args.mdir) shutil.rmtree(local_model_dir) if args.cmd == 'simapply' and local_output is not None: self._storage.push(local_output.name, args.output)
def exec_function(self, args): new_args = [] local_output = None new_args.extend([ '-mdir', self.convert_to_local_file([args.mdir], is_dir=True)[0], '-batch_size', str(args.batch_size), '-seed', str(args.seed) ]) if args.debug: new_args.append('-debug') if args.cmd == 'simtrain': new_args.extend([ '-trn', self.convert_to_local_file([args.trn])[0], '-dev', self.convert_to_local_file([args.dev])[0], '-src_tok', self.convert_to_local_file([args.src_tok])[0], '-src_voc', self.convert_to_local_file([args.src_voc])[0], '-tgt_tok', self.convert_to_local_file([args.tgt_tok])[0], '-tgt_voc', self.convert_to_local_file([args.tgt_voc])[0], '-src_emb', self.convert_to_local_file([args.src_emb])[0], '-tgt_emb', self.convert_to_local_file([args.tgt_emb])[0], '-src_emb_size', str(args.src_emb_size), '-tgt_emb_size', str(args.tgt_emb_size), '-src_lstm_size', str(args.src_lstm_size), '-tgt_lstm_size', str(args.tgt_lstm_size), '-lr', str(args.lr), '-lr_decay', str(args.lr_decay), '-lr_method', args.lr_method, '-aggr', args.aggr, '-r', str(args.r), '-dropout', str(args.dropout), '-mode', args.mode, '-max_sents', str(args.max_sents), '-n_epochs', str(args.n_epochs), '-report_every', str(args.report_every) ]) if args.cmd == 'simapply': local_src_file = self.convert_to_local_file([args.tst_src])[0] local_tgt_file = self.convert_to_local_file([args.tst_tgt])[0] new_args.append('-tst') new_args.append(local_src_file + ',' + local_tgt_file) if args.epoch: new_args.append('-epoch') new_args.append(str(args.epoch)) new_args.append('-output') if args.output == '-': new_args.append(args.output) else: local_output = tempfile.NamedTemporaryFile(delete=False) new_args.append(local_output.name) if args.q: new_args.append('-q') if args.show_matrix: new_args.append('-show_matrix') if args.show_svg: new_args.append('-show_svg') if args.show_align: new_args.append('-show_align') if args.show_last: new_args.append('-show_last') if args.show_aggr: new_args.append('-show_aggr') logger.info("command line option: %s" % " ".join(new_args)) main(['similarity.py'] + new_args) if local_output is not None: self._storage.push(local_output.name, args.output)
def opp_cos_route(): return flask.jsonify(similarity.main(flask.request.json['opp'], cos_odict))
def co_opps_route(): return flask.jsonify(similarity.main(flask.request.json['co'], opps_odict))