def reload_init(self, first_time, users): try: try: self.Config = utils.Config("debug.json") except FileNotFoundError: self.Config = utils.Config("config.json") except FileNotFoundError: self.Config = utils.Config("config.json.template") self.nickname = self.Config().main.nick self.password = self.Config().osu.irc if self.Config( ).osu.irc else None self.UPDATE_MSG = self.Config().main.update_msg self.FIRST_TIME_MSG = self.Config().main.first_time_msg self.user_pref = utils.Utils.create_sqlite_dict( "./userpref.db", "userpref") self.recommend = utils.Utils.create_sqlite_dict( "./recommend.db", "recommend") self.start_time = first_time self.users = users self.Commands = utils.Commands(self, self.Config) self.command_funcs = [ func for func in dir(utils.Commands) if callable(getattr(utils.Commands, func)) and not func.startswith("_") ]
def main(gpu, path_model, path_corpus, path_config, path_target, path_program, path_json, path_img, win_size, path_word2vec): MAX_LENGTH = 70 config = utils.Config(path_config) word_dim = config.getint("word_dim") state_dim = config.getint("state_dim") batch_size = config.getint("batch_size") print "[info] CONFIG: %s" % path_config print "[info] PRE-TRAINED WORD EMBEDDINGS: %s" % path_word2vec print "[info] LOADED MODEL: %s" % path_model print "[info] WORD DIM: %d" % word_dim print "[info] STATE DIM: %d" % state_dim print "[info] BATCH SIZE: %d" % batch_size sents_train, sents_val, vocab, ivocab = \ utils.load_corpus(path_corpus=path_corpus, max_length=MAX_LENGTH) cuda.get_device(gpu).use() model = utils.load_cxt_model(path_model, path_config, vocab) model.to_gpu(gpu) sents = parse(vocab, path_target) probs = inspect(model, sents) words, probs = aggregate(sents, probs, vocab, ivocab, win_size) tokens = [ivocab[w] for w in words] prob_dist, grid_text = collate(tokens, probs, path_program) generate_json(prob_dist, 0.05, path_program, path_json) draw_heatmap(np.array(prob_dist), grid_text, path_img)
def get_rstdt_wsj_filenames(): config = utils.Config() filenames = [] for split in ["train", "test"]: filenames_ = os.listdir( os.path.join(config.getpath("data"), "rstdt", "wsj", split)) filenames_ = [n for n in filenames_ if n.endswith(".edus")] filenames.extend(filenames_) assert len(filenames) == 385 filenames.remove("file1.edus") filenames.remove("file2.edus") filenames.remove("file3.edus") filenames.remove("file4.edus") filenames.remove("file5.edus") filenames = [n[:-len(".out.edus")] for n in filenames] filenames.append("wsj_0764") # "file1" filenames.append("wsj_0430") # "file2" filenames.append("wsj_0766") # "file3" filenames.append("wsj_0778") # "file4" filenames.append("wsj_2172") # "file5" return filenames
def main(): config = utils.Config() path_out = os.path.join(config.getpath("data"), "aarc_abst") utils.mkdir(path_out) filenames = os.listdir(config.getpath("aarc")) filenames = [n for n in filenames if n.endswith(".txt.utf8")] filenames.sort() nlp = spacy.load("en_core_web_sm", disable=["tagger", "parser", "ner", "textcat"]) cnt = 0 for filename in pyprind.prog_bar(filenames): text = extract_abstract(os.path.join(config.getpath("aarc"), filename)) if text == "": # print("No Abstract!: %s" % filename) continue with open( os.path.join(path_out, filename.replace(".txt.utf8", ".doc.tokens")), "w") as f: doc = nlp(text) tokens = [token.text for token in doc] assert len(tokens) > 0 tokens = " ".join(tokens) f.write("%s\n" % tokens) cnt += 1 print("Processed %d/%d files" % (cnt, len(filenames)))
def main(args): config = utils.Config() utils.mkdir(os.path.join(config.getpath("data"), "rstdt-vocab")) filenames = os.listdir( os.path.join(config.getpath("data"), "rstdt", "renamed")) filenames = [n for n in filenames if n.endswith(".edus")] filenames.sort() with open( os.path.join(config.getpath("data"), "rstdt", "tmp.preprocessing", "concat.edus.heads.deprel"), "w") as f: for filename in filenames: deprels = utils.read_lines(os.path.join(config.getpath("data"), "rstdt", "renamed", filename + ".heads"), process=lambda line: line.split()[-1]) for deprel in deprels: f.write("%s\n" % deprel) if args.with_root: special_words = ["<root>"] else: special_words = [] textpreprocessor.create_vocabulary.run( os.path.join(config.getpath("data"), "rstdt", "tmp.preprocessing", "concat.edus.heads.deprel"), os.path.join(config.getpath("data"), "rstdt-vocab", "deprels.vocab.txt"), prune_at=10000000, min_count=-1, special_words=special_words, with_unk=True)
def main(args): config = utils.Config() utils.mkdir(os.path.join(config.getpath("data"), "rstdt-vocab")) filenames = os.listdir( os.path.join(config.getpath("data"), "rstdt", "renamed")) filenames = [n for n in filenames if n.endswith(".edus")] filenames.sort() filepaths = [ os.path.join(config.getpath("data"), "rstdt", "renamed", filename + ".postags") for filename in filenames ] # Concat textpreprocessor.concat.run( filepaths, os.path.join(config.getpath("data"), "rstdt", "tmp.preprocessing", "concat.edus.postags")) # Build vocabulary if args.with_root: special_words = ["<root>"] else: special_words = [] textpreprocessor.create_vocabulary.run( os.path.join(config.getpath("data"), "rstdt", "tmp.preprocessing", "concat.edus.postags"), os.path.join(config.getpath("data"), "rstdt-vocab", "postags.vocab.txt"), prune_at=50000, min_count=-1, special_words=special_words, with_unk=False)
def main(): config = utils.Config() filenames = os.listdir( os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "segmented")) filenames = [n for n in filenames if n.endswith(".txt")] filenames.sort() utils.mkdir( os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "preprocessed")) for filename in pyprind.prog_bar(filenames): path_seg = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "segmented", filename) path_raw = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "raw", filename) path_dst = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "preprocessed", filename.replace(".txt", ".edus")) # Input edus = utils.read_lines(path_seg, process=lambda line: line) edus = remove_empty_lines(filename, edus) raw_lines = utils.read_lines(path_raw, process=lambda line: line) raw_lines = remove_empty_lines(filename, raw_lines) assert count_chars(edus) == count_chars(raw_lines) # Processing edus = convert_edus(edus, raw_lines) assert count_chars(edus) == count_chars(raw_lines) # Output utils.write_lines(path_dst, edus)
def main(): config = utils.Config() filename_map = read_filename_map( os.path.join(config.getpath("data"), "rstdt", "renamed", "filename_map.txt")) with open( os.path.join(config.getpath("data"), "rstdt", "tmp.preprocessing", "filelist.ptbtokenizer2.txt"), "w") as f: for filename_wsj in filename_map.keys(): filename_ren = filename_map[filename_wsj] split = None if filename_ren.startswith("train"): split = "TRAINING" elif filename_ren.startswith("test"): split = "TEST" else: raise ValueError("filename_ren=%s" % filename_ren) path_in = os.path.join(config.getpath("rstdt"), split, filename_wsj.replace(".edus", "")) path_out = os.path.join( config.getpath("data"), "rstdt", "tmp.preprocessing", filename_ren.replace(".edus", ".raw.tokenized")) f.write("%s\t%s\n" % (path_in, path_out))
def train_modal(): tf.reset_default_graph() config_model = ultils.Config() model_train = model.Model(config_model, 0.5) init = tf.initialize_all_variables() loss_history = [] save_file = path + '/weight_model/' + model_train.config.model_name + '/train_model.ckpt' saver = tf.train.Saver() with tf.Session() as sess: if (os.path.exists(save_file + '.index')): saver.restore(sess, save_file) else: sess.run(init) for epoch in range(config_model.max_epochs): arrment = np.arange(config_model.total_train_img) np.random.shuffle(arrment) print('Epoch : ' + str(epoch + 1)) total_loss = model_train.run_epoch(sess, model_train.config.batch_size, arrment) loss_history.extend(total_loss) print('Mean loss: %.1f' % np.mean(total_loss)) if epoch % 1 == 0: save_f = path + '/weight_model/' + model_train.config.model_name + 'epoch_' + str( epoch) + '/train_model.ckpt' saver.save(sess, save_f) print('Model Saved.')
def main(): config = utils.Config() for split in ["train", "test"]: dataset = dataloader.read_rstdt(split=split, relation_level="coarse-grained", with_root=False) with open( os.path.join(config.getpath("data"), "rstdt", "wsj", split, "gold.labeled.nary.ctrees"), "w") as f: for data in dataset: f.write("%s\n" % " ".join(data.nary_sexp)) with open( os.path.join(config.getpath("data"), "rstdt", "wsj", split, "gold.labeled.bin.ctrees"), "w") as f: for data in dataset: f.write("%s\n" % " ".join(data.bin_sexp)) with open( os.path.join(config.getpath("data"), "rstdt", "wsj", split, "gold.arcs"), "w") as f: for data in dataset: arcs = ["%s-%s-%s" % (h, d, r) for (h, d, r) in data.arcs] f.write("%s\n" % " ".join(arcs))
def main(): config = utils.Config() filenames = os.listdir( os.path.join(config.getpath("data"), "rstdt", "renamed")) filenames = [n for n in filenames if n.endswith(".edus")] filenames.sort() for file_i, filename in enumerate(filenames): path_tok = os.path.join(config.getpath("data"), "rstdt", "tmp.preprocessing", filename + ".tokenized") path_tok2 = os.path.join(config.getpath("data"), "rstdt", "tmp.preprocessing", filename.replace(".edus", ".raw.tokenized")) path_out = os.path.join( config.getpath("data"), "rstdt", "tmp.preprocessing", filename.replace(".edus", ".paragraph.boundaries")) try: bnds = get_paragraph_boundaries(path_tok, path_tok2) except: print("Error occured when comparing %s and %s" % (path_tok, path_tok2)) sys.exit(-1) write_boundaries(bnds, path_out)
def main(): config = utils.Config() # On board LED led = machine.Pin(2, machine.Pin.OUT, machine.Pin.PULL_UP) sensor = dht.DHT22(machine.Pin(config.get("dht_gpio", 4))) client_id = "esp8266_" + ubinascii.hexlify(machine.unique_id()).format() client = MQTTClient(client_id, config.get("mqtt_broker"), config.get("mqtt_port"), config.get("mqtt_user"), config.get("mqtt_passwd")) try: client.connect() except OSError as e: # Just report and continue, since publish will try to reconnect print("Error when connecting to the MQTT broker") else: print("Connected to {}".format(config.get("mqtt_broker"))) # Iterate and publish while True: sensor.measure() led.low() client.publish("{}/temperature".format(config.get("mqtt_topic")), str(sensor.temperature())) client.publish("{}/humidity".format(config.get("mqtt_topic")), str(sensor.humidity())) led.high() time.sleep(5)
def main(): config = utils.Config() filenames = os.listdir( os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "preprocessed")) filenames = [n for n in filenames if n.endswith(".edus")] filenames.sort() n_skipped = 0 for file_i, filename in enumerate(filenames): path_tok = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "tmp.preprocessing", filename + ".tokenized") path_tok2 = os.path.join( config.getpath("data"), "ptbwsj_wo_rstdt", "tmp.preprocessing", filename.replace(".edus", ".txt") + ".tokenized") path_out = os.path.join( config.getpath("data"), "ptbwsj_wo_rstdt", "tmp.preprocessing", filename.replace(".edus", ".paragraph.boundaries")) bnds = None try: bnds = get_paragraph_boundaries(path_tok, path_tok2) except: print("Skipped %s" % path_tok) n_skipped += 1 if bnds is not None: write_boundaries(bnds, path_out) print("Skipped %d files." % n_skipped)
def __init__(self, *, pool: asyncpg.pool.Pool, **kwargs): super().__init__(command_prefix=_get_prefix, description=config.description, pm_help=None, game=discord.Game(name=config.game), **kwargs) self.all_commands = utils.CaseInsensitiveDict(self.all_commands) self.pool = pool self.prefixes = utils.Config('prefixes.json', loop=self.loop) self.session = aiohttp.ClientSession(loop=self.loop) self.lavalink = lavalink.Client(bot=self, loop=self.loop, host=config.lava_host, rest_port=config.lava_rest, ws_port=config.lava_ws, password=config.lava_pass, player=music.Player) self.process = psutil.Process() for extension in self.startup_extensions: try: self.load_extension(extension) except Exception as e: print(f'Failed to load extension {extension}') traceback.print_exc() self.loop.create_task(self.init())
def __init__(self): self.config = utils.Config('./config.yml') self.device = None self.summary = {} self.dumpPath = None self.sysConfig() self.setSummary() self.pipeRaw = self.loadDataset() self.pipeLen = self.pipeRaw['train'].__len__() self.pipe = None self.pipeIter() self.gen = model.Generator(self.config) self.dis = model.Discriminator(self.config) if self.config.GPU == -1 and self.config.CUDA: print('Using MultiGPU') self.gen = nn.parallel.DataParallel(self.gen).to(self.device) else: self.gen = self.gen.to(self.device) self.optGen = torch.optim.Adam(self.gen.parameters(), lr=eval(self.config.LR), betas=self.config.BETA) self.optDis = torch.optim.Adam(self.dis.parameters(), lr=eval(self.config.LR), betas=self.config.BETA)
def main(): config = utils.Config() path_corpus_train = config.getpath("prep_corpus") + ".train" path_corpus_val = config.getpath("prep_corpus") + ".val" corpus_train = load_corpus(path_corpus_train, vocab=path_corpus_train + ".vocab", max_length=1000000000) corpus_val = load_corpus(path_corpus_val, vocab=corpus_train.vocab, max_length=1000000000) count = np.zeros((101, )) count = count_sentence_length(corpus_train, count=count) count = count_sentence_length(corpus_val, count=count) diff = len(corpus_train) + len(corpus_val) - count.sum() utils.logger.debug( "[info] Excluded %d sentences of length longer than %d" % (diff, len(count) - 1)) path_out = config.getpath("prep_corpus") + ".histogram.npy" np.save(path_out, count) plot_histogram(count)
def close(self): """ Closes the file and returns a URI with the final address of the file. If save_metadata is set, the file metadata is saved in the DHT. You can always access to the metadata with File.metadata """ if self.closed: return logger.info('Closing %s'%self.uri.get_readable()) if self.mode == 'w': self.flush(True) self.metadata.set('Main:parts', len(self.parts)) self.metadata.set('Main:length', self.filelength) self.metadata.set('Main:hash', self.hasher.hexdigest()) self.metadata.set('Main:p', '') if self.save_metadata: # variables used to chain metadata blocks puri = self.uri pmeta = self.metadata not_saved = True # crypter used to encrypt the metadata. There is always # a crypter to protect against casual atackers, but # if there is no Kf the crypter is nearly useless if SECURED: if self.keys[4]: mdencrypter = AES.new(self.keys[4], AES.MODE_CBC, self.uri.get_hd()) else: mdencrypter = AES.new(self.uri.get_hd(), AES.MODE_CBC, self.uri.get_hd()) else: mdencrypter = DummyEncrypter() for i in range(0, len(self.parts)): pmeta.set('Part:%d'%i, self.parts[i]) # chain the metadata blocks, each block only with # DESC_PER_METAPART references to parts of the file if i<len(self.parts)-1 and i%self.DESC_PER_METAPART == self.DESC_PER_METAPART-1: nuri=URI(self.uri.uid,utils.random_nick(),'',self.keys) nuri.hd=utils.random_string(16,False) pmeta.set('Main:n',nuri.get_static()) m=pmeta.save() pmeta.set('Main:p',utils.random_string(self.BLOCK_SIZE-len(m))) m=mdencrypter.encrypt(pmeta.save()) dfs.dht.put(puri.get_hd(),m,puri.nick) pmeta=utils.Config() pmeta.set('Main:p','') puri=nuri not_saved=False else: not_saved=True if not_saved: m=pmeta.save() pmeta.set('Main:p',utils.random_string(self.BLOCK_SIZE-len(m))) m=mdencrypter.encrypt(pmeta.save()) dfs.dht.put(puri.get_hd(),m,puri.nick) # Create the final metadata for i in range(0,len(self.parts)): self.metadata.set('Part:%d'%i,self.parts[i]) else: # In read, free the buffer self.buffer = None self.closed = True return self.uri
def main(): config = utils.Config() filenames = os.listdir( os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "preprocessed")) filenames = [n for n in filenames if n.endswith(".edus")] filenames.sort() n_skipped = 0 for file_i, filename in enumerate(filenames): path_s = os.path.join( config.getpath("data"), "ptbwsj_wo_rstdt", "tmp.preprocessing", filename.replace(".edus", ".sentence.boundaries")) path_p = os.path.join( config.getpath("data"), "ptbwsj_wo_rstdt", "tmp.preprocessing", filename.replace(".edus", ".paragraph.boundaries")) if not os.path.exists(path_s): print("Skipped %s because %s doesn't exist." % (filename, path_s)) n_skipped += 1 continue if not os.path.exists(path_p): print("Skipped %s because %s doesn't exist." % (filename, path_p)) n_skipped += 1 continue sbnds = utils.read_lines( path_s, process=lambda l: tuple([int(x) for x in l.split()])) pbnds = utils.read_lines( path_p, process=lambda l: tuple([int(x) for x in l.split()])) sbnds_proj, n_edus = project_pbnds_to_sbnds(sbnds=sbnds, pbnds=pbnds) if sbnds != sbnds_proj: print("Projected paragraph boundaries into the sentence boundaries (+%d): %s" % \ (len(sbnds_proj) - len(sbnds), path_s)) test_boundaries(sbnds_proj, n_edus) pbnds = replace_subtrees_with_ids(sbnds=sbnds_proj, pbnds=pbnds) write_boundaries( sbnds, os.path.join( config.getpath("data"), "ptbwsj_wo_rstdt", "preprocessed", filename.replace(".edus", ".sentence.noproj.boundaries"))) write_boundaries( sbnds_proj, os.path.join( config.getpath("data"), "ptbwsj_wo_rstdt", "preprocessed", filename.replace(".edus", ".sentence.proj.boundaries"))) write_boundaries( pbnds, os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "preprocessed", filename.replace(".edus", ".paragraph.boundaries"))) print("Skipped %d files." % n_skipped)
def __init__(self): self.config = utils.Config() super().__init__( command_prefix=get_prefix, description="A bot that does simple yet marvellous things!", case_insensitive=self.config.case_insensitive) self.remove_command("help")
def main() : config = utils.Config() utils.mkdir(os.path.join(config.getpath("data"), "rstdt-vocab")) filenames = [] for filename in os.listdir(os.path.join(config.getpath("data"), "rstdt", "wsj", "train")): filenames.append(os.path.join(config.getpath("data"), "rstdt", "wsj", "train", filename)) for filename in os.listdir(os.path.join(config.getpath("data"), "rstdt", "wsj", "test")): filenames.append(os.path.join(config.getpath("data"), "rstdt", "wsj", "test", filename)) filenames = [n for n in filenames if n.endswith(".labeled.bin.ctree")] filenames.sort() relation_mapper = treetk.rstdt.RelationMapper() frelations = [] crelations = [] nuclearities = [] for filename in pyprind.prog_bar(filenames): sexp = utils.read_lines(filename, process=lambda line: line) sexp = treetk.preprocess(sexp) tree = treetk.rstdt.postprocess(treetk.sexp2tree(sexp, with_nonterminal_labels=True, with_terminal_labels=False)) nodes = treetk.traverse(tree, order="pre-order", include_terminal=False, acc=None) part_frelations = [] part_crelations = [] part_nuclearities = [] for node in nodes: relations_ = node.relation_label.split("/") part_frelations.extend(relations_) part_crelations.extend([relation_mapper.f2c(r) for r in relations_]) part_nuclearities.append(node.nuclearity_label) part_frelations.append("<root>") part_crelations.append("<root>") frelations.append(part_frelations) crelations.append(part_crelations) nuclearities.append(part_nuclearities) fcounter = utils.get_word_counter(lines=frelations) ccounter = utils.get_word_counter(lines=crelations) ncounter = utils.get_word_counter(lines=nuclearities) frelations = fcounter.most_common() # list of (str, int) crelations = ccounter.most_common() # list of (str, int) nuclearities = ncounter.most_common() # list of (str, int) utils.write_vocab(os.path.join(config.getpath("data"), "rstdt-vocab", "relations.fine.vocab.txt"), frelations) utils.write_vocab(os.path.join(config.getpath("data"), "rstdt-vocab", "relations.coarse.vocab.txt"), crelations) utils.write_vocab(os.path.join(config.getpath("data"), "rstdt-vocab", "nuclearities.vocab.txt"), nuclearities)
def main(): config = utils.Config() filenames = os.listdir( os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "preprocessed")) filenames = [n for n in filenames if n.endswith(".paragraph.boundaries")] filenames = [ n.replace(".paragraph.boundaries", ".edus") for n in filenames ] filenames.sort() for filename in filenames: # Path path_edus = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "tmp.preprocessing", filename + ".tokenized") path_conll = os.path.join( config.getpath("data"), "ptbwsj_wo_rstdt", "tmp.preprocessing", filename.replace(".edus", ".sentences.conll")) path_out = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "preprocessed", filename + ".postags") # Read edus = utils.read_lines( path_edus, process=lambda line: line.split()) # list of list of str tokens_e = utils.flatten_lists(edus) # list of str sentences = utils.read_conll( path_conll, keys=["ID", "FORM", "LEMMA", "POSTAG", "_1", "HEAD", "DEPREL"]) # list of list of {str: str} conll_lines = utils.flatten_lists(sentences) # list of {str: str} tokens_s = [conll_line["FORM"] for conll_line in conll_lines] # list of str postags_s = [conll_line["POSTAG"] for conll_line in conll_lines] # list of str # Check whether the number of tokens and that of postags are equivalent for token_e, token_s, postag_s in zip(tokens_e, tokens_s, postags_s): if token_e != token_s: raise ValueError("Error! %s != %s" % (token_e, token_s)) # Create the POSTAG-version of EDUs postag_i = 0 edus_postag = [] for edu in edus: edu_postag = [postags_s[postag_i + i] for i in range(len(edu))] edus_postag.append(edu_postag) postag_i += len(edu) # Write with open(path_out, "w") as f: for edu_postag in edus_postag: f.write("%s\n" % " ".join(edu_postag))
def __init__(self, ps, **kw): kw.setdefault('name', qu.to_snake_case(type(self).__name__)) kw.setdefault('dtype', tf.float32) super().__init__(**kw) if isinstance(ps, qu.Config): self.cfg = ps else: self.cfg = qu.Config(**dict(self.cfg_items(ps))) cfg = self.cfg if cfg.initer_stddev: self.initer = ki.TruncatedNormal(stddev=cfg.initer_stddev)
def main(): config = utils.Config() dataset = dataloader.read_scidtb(split="train", sub_dir="", relation_level="coarse-grained") with open( os.path.join(config.getpath("data"), "scidtb", "preprocessed", "train", "gold.arcs"), "w") as f: for data in dataset: arcs = ["%s-%s-%s" % (h, d, r) for (h, d, r) in data.arcs] f.write("%s\n" % " ".join(arcs)) dataset = dataloader.read_scidtb(split="dev", sub_dir="gold", relation_level="coarse-grained") with open( os.path.join(config.getpath("data"), "scidtb", "preprocessed", "dev", "gold", "gold.arcs"), "w") as f: for data in dataset: arcs = ["%s-%s-%s" % (h, d, r) for (h, d, r) in data.arcs] f.write("%s\n" % " ".join(arcs)) dataset = dataloader.read_scidtb(split="dev", sub_dir="second_annotate", relation_level="coarse-grained") with open( os.path.join(config.getpath("data"), "scidtb", "preprocessed", "dev", "second_annotate", "gold.arcs"), "w") as f: for data in dataset: arcs = ["%s-%s-%s" % (h, d, r) for (h, d, r) in data.arcs] f.write("%s\n" % " ".join(arcs)) dataset = dataloader.read_scidtb(split="test", sub_dir="gold", relation_level="coarse-grained") with open( os.path.join(config.getpath("data"), "scidtb", "preprocessed", "test", "gold", "gold.arcs"), "w") as f: for data in dataset: arcs = ["%s-%s-%s" % (h, d, r) for (h, d, r) in data.arcs] f.write("%s\n" % " ".join(arcs)) dataset = dataloader.read_scidtb(split="test", sub_dir="second_annotate", relation_level="coarse-grained") with open( os.path.join(config.getpath("data"), "scidtb", "preprocessed", "test", "second_annotate", "gold.arcs"), "w") as f: for data in dataset: arcs = ["%s-%s-%s" % (h, d, r) for (h, d, r) in data.arcs] f.write("%s\n" % " ".join(arcs))
def main(): config = utils.Config() utils.mkdir(os.path.join(config.getpath("data"), "scidtb-vocab")) relation_mapper = treetk.rstdt.RelationMapper(corpus_name="scidtb") filenames = [] for filename in os.listdir(os.path.join(config.getpath("data"), "scidtb", "preprocessed", "train")): filenames.append(os.path.join(config.getpath("data"), "scidtb", "preprocessed", "train", filename)) for filename in os.listdir(os.path.join(config.getpath("data"), "scidtb", "preprocessed", "dev", "gold")): filenames.append(os.path.join(config.getpath("data"), "scidtb", "preprocessed", "dev", "gold", filename)) for filename in os.listdir(os.path.join(config.getpath("data"), "scidtb", "preprocessed", "dev", "second_annotate")): filenames.append(os.path.join(config.getpath("data"), "scidtb", "preprocessed", "dev", "second_annotate", filename)) for filename in os.listdir(os.path.join(config.getpath("data"), "scidtb", "preprocessed", "test", "gold")): filenames.append(os.path.join(config.getpath("data"), "scidtb", "preprocessed", "test", "gold", filename)) for filename in os.listdir(os.path.join(config.getpath("data"), "scidtb", "preprocessed", "test", "second_annotate")): filenames.append(os.path.join(config.getpath("data"), "scidtb", "preprocessed", "test", "second_annotate", filename)) filenames = [n for n in filenames if n.endswith(".edus.tokens")] filenames.sort() tmp_f_path = os.path.join(config.getpath("data"), "scidtb-vocab", "tmp_f.txt") tmp_c_path = os.path.join(config.getpath("data"), "scidtb-vocab", "tmp_c.txt") with open(tmp_f_path, "w") as ff, open(tmp_c_path, "w") as fc: for filename in filenames: lines = utils.read_lines(filename.replace(".edus.tokens", ".arcs"), process=lambda line: line.split()) assert len(lines) == 1 line = lines[0] arcs = treetk.hyphens2arcs(line) fine_relations = [l for h,d,l in arcs] coarse_relations = [relation_mapper.f2c(l) for l in fine_relations] fine_relations = " ".join(fine_relations) coarse_relations = " ".join(coarse_relations) ff.write("%s\n" % fine_relations) fc.write("%s\n" % coarse_relations) utils.build_vocabulary(paths_file=[tmp_f_path], path_vocab=os.path.join(config.getpath("data"), "scidtb-vocab", "relations.fine.vocab.txt"), prune_at=50000, min_count=-1, special_words=["<root>"], with_unk=False) utils.build_vocabulary(paths_file=[tmp_c_path], path_vocab=os.path.join(config.getpath("data"), "scidtb-vocab", "relations.coarse.vocab.txt"), prune_at=50000, min_count=-1, special_words=["<root>"], with_unk=False)
def __init__(self, first_time, users, channel=None): self.channel = channel # region reload_init try: try: self.Config = utils.Config("debug.json") except FileNotFoundError: self.Config = utils.Config("config.json") except FileNotFoundError: self.Config = utils.Config("config.json.template") self.nickname = self.Config().main.nick self.password = self.Config().osu.irc if self.Config( ).osu.irc else None self.UPDATE_MSG = self.Config().main.update_msg self.FIRST_TIME_MSG = self.Config().main.first_time_msg self.user_pref = utils.Utils.create_sqlite_dict( "./userpref.db", "userpref") self.recommend = utils.Utils.create_sqlite_dict( "./recommend.db", "recommend") self.start_time = first_time self.users = users self.Commands = utils.Commands(self, self.Config) self.command_funcs = [ func for func in dir(utils.Commands) if callable(getattr(utils.Commands, func)) and not func.startswith("_") ] # endregion logger.debug("Trying nickname " + self.nickname) logger.debug("On server " + self.Config().main.server) logger.debug("Using password " + ( self.password if self.password is not None else "\"None\""))
def generate_new_caption( img_feat): tf.reset_default_graph() config=utils.Config() model_gen=model.Model(config,1) save_file =path+ '/weight_model/'+model_gen.config.model_name+'/train_model.ckpt' saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, save_file) captions=model_gen.generate_caption(sess,img_feat)[0:4] for caption in captions: print(utils.listToSentence(caption['sen']), 'score: ', caption['score']) return captions
async def reload(self, ctx): """Reloads the config file.""" del self.bot.config self.bot.config = utils.Config() await utils.embed( ctx, discord.Embed( timestamp=datetime.utcnow(), title="Config Reloaded", description="All config data has been successfully reloaded."))
def main(): config = utils.Config() process1(os.path.join(config.getpath("rstdt"), "TRAINING"), os.path.join(config.getpath("data"), "rstdt", "wsj", "train")) process1(os.path.join(config.getpath("rstdt"), "TEST"), os.path.join(config.getpath("data"), "rstdt", "wsj", "test")) process2(os.path.join(config.getpath("data"), "rstdt", "wsj", "train"), os.path.join(config.getpath("data"), "rstdt", "renamed"), split="train") process2(os.path.join(config.getpath("data"), "rstdt", "wsj", "test"), os.path.join(config.getpath("data"), "rstdt", "renamed"), split="test")
def init_default_conf(): """ Sets up a default configuration for DFS: uses $HOME/.dfs as the config dir, reads the default configuration from $HOME/.dfs/dfsrc, and sets up the logging system to use the file dfs.log in the configuration directory """ global default_config_dir, default_config, default_log_file, default_config_file, dht # Creates a directory to the default config, if it does not exists. default_config_dir = os.path.expanduser('~%s.dfs' % os.path.sep) # Create the default config path if it does not exists if not os.path.exists(default_config_dir): os.mkdir(default_config_dir) default_config_file = default_config_dir + os.path.sep + 'dfsrc' default_log_file = default_config_dir + os.path.sep + 'dfs.log' # Load the default config file if not os.path.exists(default_config_file): open(default_config_file, 'w').close() default_config = utils.Config() default_config.load(open(default_config_file, 'r')) # Configures the logging system utils.configure_logging( level=logging.INFO, format='%(asctime)s %(name)s %(levelname)s %(message)s', datefmt='%H:%M:%S', filename=default_log_file, filemode='w') logging.info('Default configuration: %s' % default_config_file) # sets default configuration, if not set changed = False if not default_config.get('DHT:datadir'): default_config.set('DHT:datadir', default_config_dir + os.path.sep + 'dhtdata') changed = True if not default_config.get('Main:UID'): default_config.set('Main:uid', utils.random_string(16)) changes = True if not default_config.get('Main:nick'): default_config.set('Main:nick', utils.random_nick()) changed = True if not default_config.get('Keys:kf'): logging.warning('There are not file key') if not default_config.get('Keys:kd'): logging.warning('There are not description key') if changed: default_config.save(open(default_config_file, 'w')) # Default DHT: a local DHT dht = DHT.LocalDHT(default_config)
def main(): config = utils.Config() filenames = os.listdir( os.path.join(config.getpath("data"), "rstdt", "renamed")) filenames = [n for n in filenames if n.endswith(".edus")] filenames.sort() for filename in filenames: textpreprocessor.replace_digits.run( os.path.join(config.getpath("data"), "rstdt", "tmp.preprocessing", filename + ".tokenized.lowercased"), os.path.join(config.getpath("data"), "rstdt", "tmp.preprocessing", filename + ".tokenized.lowercased.replace_digits"))