def main(clean_folder: str, pretrained_embedding_location: str, max_length: int, pad_token: str, unk_token: str, ready_folder: str): # read all files (train_sentences, train_relations, valid_sentences, valid_relations, test_sentences, test_relations, embeddings) = read.run(clean_folder, pretrained_embedding_location) # xd: sentence add_pad_unk(embeddings, pad_token, unk_token) train_sentences_, train_case_seqs = adjust_sentence( train_sentences, max_length, pad_token, embeddings, unk_token) valid_sentences_, valid_case_seqs = adjust_sentence( valid_sentences, max_length, pad_token, embeddings, unk_token) test_sentences_, test_case_seqs = adjust_sentence(test_sentences, max_length, pad_token, embeddings, unk_token) # filter unused words sentences = train_sentences_ + valid_sentences_ + test_sentences_ word_lookup, word_embedding = filter_unused_word(embeddings, sentences) # yd: relation train_relations_ = adjust_relation(train_relations, max_length) valid_relations_ = adjust_relation(valid_relations, max_length) test_relations_ = adjust_relation(test_relations, max_length) # write sentence and relation write.run(train_sentences_, valid_sentences_, test_sentences_, word_lookup, word_embedding, train_case_seqs, valid_case_seqs, test_case_seqs, train_relations_, valid_relations_, test_relations_, ready_folder)
import checksignal as cs if cfg.quick == True: cs.run(cfg.name + str(cfg.maxdepth), quick=True) else: cs.run(cfg.name + str(cfg.maxdepth)) if cfg.crossvalidation == True: import crossvalidation as cv if cfg.quick == True: cv.run(cfg.name + str(cfg.maxdepth), quick=True) else: cv.run(cfg.name + str(cfg.maxdepth)) if cfg.plot == True: import plot as p if cfg.quick == True: p.run(cfg.name + str(cfg.maxdepth), int(cfg.bins), quick=True) else: p.run(cfg.name + str(cfg.maxdepth), int(cfg.bins)) if cfg.write == True: import write as w if cfg.quick == True: raise Exception("Requires full dataset") else: w.run(cfg.name + str(cfg.maxdepth), cfg.source) end = time.time() print time.asctime(time.localtime()), "Code Ended" pl.show()
import checksignal as cs if cfg.quick == True: cs.run(cfg.name + str(cfg.maxdepth), quick = True) else: cs.run(cfg.name + str(cfg.maxdepth)) if cfg.crossvalidation == True: import crossvalidation as cv if cfg.quick == True: cv.run(cfg.name + str(cfg.maxdepth), quick = True) else: cv.run(cfg.name + str(cfg.maxdepth)) if cfg.plot == True: import plot as p if cfg.quick == True: p.run(cfg.name + str(cfg.maxdepth), int(cfg.bins), quick = True) else: p.run(cfg.name + str(cfg.maxdepth), int(cfg.bins)) if cfg.write == True: import write as w if cfg.quick == True: raise Exception("Requires full dataset") else: w.run(cfg.name + str(cfg.maxdepth),cfg.source) end = time.time() print time.asctime(time.localtime()), "Code Ended" pl.show()
def write(self): write.run()