def write_info(info_file, mallet_files, t1):
    fh = open(info_file, 'w')
    fh.write("$ python %s\n\n" % ' '.join(sys.argv))
    fh.write("timestamp         =  %s\n" % time.strftime("%Y%m%d:%H%M%S"))
    fh.write("seconds elapsed   =  %d\n" % int(time.time() - t1))
    fh.write("git_commit        =  %s\n\n" % get_git_commit())
    for f in mallet_files:
        fh.write("source            =  %s\n" % f)
Ejemplo n.º 2
0
def _write_info(source_file, out_file1, out_file2, t1, t2):
    for fname in (out_file1, out_file2):
        fh = open(fname + '.info', 'w')
        fh.write("$ python %s\n\n" % ' '.join(sys.argv))
        fh.write("source file       =  %s\n" % os.path.abspath(source_file))
        fh.write("target file       =  %s\n" % os.path.abspath(fname))
        fh.write("timestamp         =  %s\n" % time.strftime("%Y%m%d:%H%M%S"))
        fh.write("seconds elapsed   =  %d\n" % int(t2 - t1))
        fh.write("git_commit        =  %s" % get_git_commit())
Ejemplo n.º 3
0
def _write_info(source_file, target_file, feats_file, feats, t1):
    fh = open(target_file + '.info', 'w')
    fh.write("$ python %s\n\n" % ' '.join(sys.argv))
    fh.write("source file       =  %s\n" % os.path.abspath(source_file))
    fh.write("target file       =  %s\n" % os.path.abspath(target_file))
    fh.write("features file     =  %s\n" % feats_file)
    fh.write("features          =  %s\n" % ' '.join(sorted(feats.keys())))
    fh.write("timestamp         =  %s\n" % time.strftime("%Y%m%d:%H%M%S"))
    fh.write("processing time   =  %ds\n" % (time.time() - t1))
    fh.write("git_commit        =  %s" % get_git_commit())
def _itrainer_create_info_file(corpus, model, filelist, features, annotation):
    with open(os.path.join(model, 'itrain.info.general'), 'w') as fh:
        fh.write("$ python %s\n\n" % ' '.join(sys.argv))
        fh.write("corpus          =  %s\n" % corpus)
        fh.write("file_list       =  %s\n" % filelist)
        fh.write("model           =  %s\n" % model)
        fh.write("features        =  %s\n" % features)
        fh.write("anotation       =  %s\n" % annotation)
        fh.write("git_commit      =  %s\n" % get_git_commit())
    shutil.copyfile(annotation, os.path.join(model, 'itrain.info.annotations'))
    shutil.copyfile(filelist, os.path.join(model, 'itrain.info.files'))
def create_info_files(corpus, model, filelist, classification):
    with open(os.path.join(classification, 'iclassify.info.general'),
              'w') as fh:
        fh.write("$ python %s\n\n" % ' '.join(sys.argv))
        fh.write("corpus          =  %s\n" % corpus)
        fh.write("file_list       =  %s\n" % filelist)
        fh.write("model           =  %s\n" % model)
        fh.write("classification  =  %s\n" % classification)
        fh.write("git_commit      =  %s\n" % get_git_commit())
    shutil.copyfile(filelist,
                    os.path.join(classification, 'iclassify.info.files'))
Ejemplo n.º 6
0
def add_info_file(corpus_dir, extra_files, added):
    """Append information to CORPUS/config/additions.txt."""
    info_file = os.path.join(corpus_dir, 'config', corpus.FNAME_INFO_ADDITIONS)
    make_writable(info_file)
    fh = open(info_file, 'a')
    fh.write("$ %s\n\n" % ' '.join(sys.argv))
    fh.write("timestamp    =  %s\n" % time.strftime("%x %X"))
    fh.write("file_list    =  %s\n" % extra_files)
    fh.write("files_added  =  %s\n" % added)
    fh.write("git_commit   =  %s\n\n\n" % get_git_commit())
    fh.close()
    read_only(info_file)
def _write_info(source_file, target_file, info_string, threshold, t1, t2):
    if target_file.endswith('.gz'):
        target_file = target_file[:-3]
    fh = open(target_file + '.info', 'w')
    fh.write("$ python %s\n\n" % ' '.join(sys.argv))
    fh.write("source file       =  %s\n" % os.path.abspath(source_file))
    fh.write("target file       =  %s\n" % os.path.abspath(target_file))
    fh.write("threshold         =  %d\n" % threshold)
    fh.write("timestamp         =  %s\n" % time.strftime("%Y%m%d:%H%M%S"))
    fh.write("seconds elapsed   =  %d\n" % int(t2 - t1))
    fh.write("git_commit        =  %s" % get_git_commit())
    fh.write("\n\n" + info_string + "\n")
Ejemplo n.º 8
0
 def _generate_settings(self):
     self.command = "$ python %s\n\n" % ' '.join(sys.argv)
     self.settings = [
         "timestamp    =  %s\n" % time.strftime("%x %X"),
         "language     =  %s\n" % self.language,
         "datasource   =  %s\n" % self.datasource,
         "source_file  =  %s\n" % self.source_file,
         "source_path  =  %s\n" % self.source_path,
         "target_path  =  %s\n" % self.location,
         "shuffle      =  %s\n" % str(self.shuffle_file),
         "git_commit   =  %s\n" % get_git_commit()
     ]
 def _create_info_general_file(self):
     with open(self.info_file_general, 'w') as fh:
         fh.write("$ python %s\n\n" % ' '.join(sys.argv))
         fh.write("model             =  %s\n" % os.path.abspath(self.model))
         fh.write("corpus            =  %s\n" % os.path.abspath(self.corpus))
         fh.write("features          =  %s\n" % ' '.join(get_features()))
         fh.write("file_list         =  %s\n" % os.path.abspath(self.file_list))
         fh.write("annotation_file   =  %s\n" % os.path.abspath(self.annotation_file))
         fh.write("annotation_count  =  %s\n" % self.annotation_count)
         fh.write("config_file       =  %s\n" % \
                  os.path.abspath(rconfig.pipeline_config_file))
         fh.write("timestamp         =  %s\n" % time.strftime("%Y%m%d:%H%M%S"))
         fh.write("git_commit        =  %s\n" % get_git_commit())
Ejemplo n.º 10
0
def _write_info(mallet_file, model_file, mtrainer, out_file, stderr_file, t1):
    fh = open(model_file + '.info', 'w')
    fh.write("$ python %s\n\n" % ' '.join(sys.argv))
    fh.write("mallet file       =  %s\n" % os.path.abspath(mallet_file))
    fh.write("model file        =  %s\n" % os.path.abspath(model_file))
    fh.write("trainer settings  =  %s\n" % mtrainer.settings())
    fh.write("timestamp         =  %s\n" % time.strftime("%Y%m%d:%H%M%S"))
    fh.write("time elapsed      =  %ds\n" % (time.time() - t1))
    fh.write("git_commit        =  %s\n\n" % get_git_commit())
    fh.write("$ %s\n\n" % mtrainer.saved_create_vectors_command)
    fh.write("$ %s\n" % mtrainer.saved_create_model_command)
    fh.write("\nContents of .out file:\n\n")
    for line in open(out_file):
        fh.write("    %s" % line)
    fh.write("\nContents of .stderr file:\n\n")
    for line in open(stderr_file):
        line = line.replace("\f", "\n    ")
        line = line.replace("\r", "\n    ")
        fh.write("    %s" % line)
    for cmd in mtrainer.saved_create_cinfo_commands:
        fh.write("\n$ %s\n" % cmd)
Ejemplo n.º 11
0
        print fname
        fh_terms = open_input_file(fname)
        count = 0
        for line in fh_terms:
            count += 1
            if count > 100000: break
            if count % 500000 == 0: print '  ', count
            fields = line.split("\t")
            term = fields[0]
            term_count = int(fields[2])
            terms[term] = terms.get(term, 0) + term_count
    return terms


if __name__ == '__main__':

    target_dir = sys.argv[1]
    result_files = []
    for exp in sys.argv[2:]:
        files = glob.glob(exp)
        result_files.extend(files)

    ensure_path(target_dir)
    infofile = target_dir + '/merged_term_frequencies.info.txt'
    fh_info = codecs.open(infofile, 'w', encoding='utf-8')
    fh_info.write("git commit = %s\n\n" % get_git_commit())
    for fname in result_files:
        fh_info.write(fname + u"\n")

    merge_result_files(target_dir, result_files)