Beispiel #1
0
def create_info_file(classification, language, condense_results):
    with open(os.path.join(classification, 'iclassify.info'), 'w') as fh:
        fh.write("$ python %s\n\n" % ' '.join(sys.argv))
        fh.write("classification        =  %s\n" % classification)
        fh.write("language              =  %s\n" % language)
        fh.write("condense_results      =  %s\n" % condense_results)
        fh.write("git_commit            =  %s\n" % get_git_commit())
Beispiel #2
0
def create_info_file(filename, batch, infile, outfile):
    with open(filename, 'w') as fh:
        fh.write("$ python %s\n\n" % ' '.join(sys.argv))
        fh.write("batch         =  %s\n" % batch)
        fh.write("source file   =  %s\n" % infile)
        fh.write("summary file  =  %s\n" % outfile)
        fh.write("git_commit    =  %s\n" % get_git_commit())
Beispiel #3
0
 def _update_info_files(self):
     """Write files with information on the build."""
     fh = open(os.path.join(self.idx_dir, 'index.info.general.txt'), 'a')
     fh.write("$ python %s\n\n" % ' '.join(sys.argv))
     fh.write("index_name   =  %s\n" % self.index_name)
     fh.write("dataset      =  %s\n" % self.dataset)
     fh.write("git_commit   =  %s\n" % get_git_commit())
     fh.write("timestamp    =  %s\n\n" % time.strftime('%Y%m%d-%H%M%S'))
def write_info(info_file, mallet_files, t1):
    fh = open(info_file, 'w')
    fh.write("$ python %s\n\n" % ' '.join(sys.argv))
    fh.write("timestamp         =  %s\n" % time.strftime("%Y%m%d:%H%M%S"))
    fh.write("seconds elapsed   =  %d\n" % int(time.time() - t1))
    fh.write("git_commit        =  %s\n\n" % get_git_commit())
    for f in mallet_files:
        fh.write("source            =  %s\n" % f)
Beispiel #5
0
def _write_info(source_file, out_file1, out_file2, t1, t2):
    for fname in (out_file1, out_file2):
        fh = open(fname + '.info', 'w')
        fh.write("$ python %s\n\n" % ' '.join(sys.argv))
        fh.write("source file       =  %s\n" % os.path.abspath(source_file))
        fh.write("target file       =  %s\n" % os.path.abspath(fname))
        fh.write("timestamp         =  %s\n" % time.strftime("%Y%m%d:%H%M%S"))
        fh.write("seconds elapsed   =  %d\n" % int(t2 - t1))
        fh.write("git_commit        =  %s" % get_git_commit())
Beispiel #6
0
def _write_info(source_file, target_file, feats_file, feats, t1):
    fh = open(target_file + '.info', 'w')
    fh.write("$ python %s\n\n" % ' '.join(sys.argv))
    fh.write("source file       =  %s\n" % os.path.abspath(source_file))
    fh.write("target file       =  %s\n" % os.path.abspath(target_file))
    fh.write("features file     =  %s\n" % feats_file)
    fh.write("features          =  %s\n" % ' '.join(sorted(feats.keys())))
    fh.write("timestamp         =  %s\n" % time.strftime("%Y%m%d:%H%M%S"))
    fh.write("processing time   =  %ds\n" % (time.time() - t1))
    fh.write("git_commit        =  %s" % get_git_commit())
def _itrainer_create_info_file(corpus, model, filelist, features, annotation):
    with open(os.path.join(model, 'itrain.info.general'), 'w') as fh:
        fh.write("$ python %s\n\n" % ' '.join(sys.argv))
        fh.write("corpus          =  %s\n" % corpus)
        fh.write("file_list       =  %s\n" % filelist)
        fh.write("model           =  %s\n" % model)
        fh.write("features        =  %s\n" % features)
        fh.write("anotation       =  %s\n" % annotation)
        fh.write("git_commit      =  %s\n" % get_git_commit())
    shutil.copyfile(annotation, os.path.join(model, 'itrain.info.annotations'))
    shutil.copyfile(filelist, os.path.join(model, 'itrain.info.files'))
def create_info_files(corpus, model, filelist, classification):
    with open(os.path.join(classification, 'iclassify.info.general'),
              'w') as fh:
        fh.write("$ python %s\n\n" % ' '.join(sys.argv))
        fh.write("corpus          =  %s\n" % corpus)
        fh.write("file_list       =  %s\n" % filelist)
        fh.write("model           =  %s\n" % model)
        fh.write("classification  =  %s\n" % classification)
        fh.write("git_commit      =  %s\n" % get_git_commit())
    shutil.copyfile(filelist,
                    os.path.join(classification, 'iclassify.info.files'))
def _write_info(source_file, target_file, info_string, threshold, t1, t2):
    if target_file.endswith('.gz'):
        target_file = target_file[:-3]
    fh = open(target_file + '.info', 'w')
    fh.write("$ python %s\n\n" % ' '.join(sys.argv))
    fh.write("source file       =  %s\n" % os.path.abspath(source_file))
    fh.write("target file       =  %s\n" % os.path.abspath(target_file))
    fh.write("threshold         =  %d\n" % threshold)
    fh.write("timestamp         =  %s\n" % time.strftime("%Y%m%d:%H%M%S"))
    fh.write("seconds elapsed   =  %d\n" % int(t2 - t1))
    fh.write("git_commit        =  %s" % get_git_commit())
    fh.write("\n\n" + info_string + "\n")
 def _generate_settings(self):
     self.command = "$ python %s\n\n" % ' '.join(sys.argv)
     self.settings = [
         "timestamp    =  %s\n" % time.strftime("%x %X"),
         "language     =  %s\n" % self.language,
         "datasource   =  %s\n" % self.datasource,
         "source_file  =  %s\n" % self.source_file,
         "source_path  =  %s\n" % self.source_path,
         "target_path  =  %s\n" % self.target_path,
         "shuffle      =  %s\n" % str(self.shuffle_file),
         "git_commit   =  %s\n" % get_git_commit()
     ]
Beispiel #11
0
def add_info_file(corpus_dir, extra_files, added):
    """Append information to CORPUS/config/additions.txt."""
    info_file = os.path.join(corpus_dir, 'config', corpus.FNAME_INFO_ADDITIONS)
    make_writable(info_file)
    fh = open(info_file, 'a')
    fh.write("$ %s\n\n" % ' '.join(sys.argv))
    fh.write("timestamp    =  %s\n" % time.strftime("%x %X"))
    fh.write("file_list    =  %s\n" % extra_files)
    fh.write("files_added  =  %s\n" % added)
    fh.write("git_commit   =  %s\n\n\n" % get_git_commit())
    fh.close()
    read_only(info_file)
 def _create_info_general_file(self):
     with open(self.info_file_general, 'w') as fh:
         fh.write("$ python %s\n\n" % ' '.join(sys.argv))
         fh.write("model             =  %s\n" % self.model)
         fh.write("xval              =  %s\n" % self.xval)
         fh.write("file_list         =  %s\n" % self.file_list)
         fh.write("annotation_file   =  %s\n" % self.annotation_file)
         fh.write("annotation_count  =  %s\n" % self.annotation_count)
         fh.write("config_file       =  %s\n" % \
                  os.path.basename(rconfig.pipeline_config_file))
         fh.write("features          =  %s\n" % self.features)
         fh.write("git_commit        =  %s" % get_git_commit())
 def _create_info_general_file(self):
     with open(self.info_file_general, 'w') as fh:
         fh.write("$ python %s\n\n" % ' '.join(sys.argv))
         fh.write("model             =  %s\n" % os.path.abspath(self.model))
         fh.write("corpus            =  %s\n" % os.path.abspath(self.corpus))
         fh.write("features          =  %s\n" % ' '.join(get_features()))
         fh.write("file_list         =  %s\n" % os.path.abspath(self.file_list))
         fh.write("annotation_file   =  %s\n" % os.path.abspath(self.annotation_file))
         fh.write("annotation_count  =  %s\n" % self.annotation_count)
         fh.write("config_file       =  %s\n" % \
                  os.path.abspath(rconfig.pipeline_config_file))
         fh.write("timestamp         =  %s\n" % time.strftime("%Y%m%d:%H%M%S"))
         fh.write("git_commit        =  %s\n" % get_git_commit())
 def _create_info_files(self):
     print "[--classify] initializing %s directory" % self.batch
     with open(self.info_file_general, 'w') as fh:
         fh.write("$ python %s\n\n" % ' '.join(sys.argv))
         fh.write("batch        =  %s\n" % self.batch)
         fh.write("file_list    =  %s\n" % self.file_list)
         fh.write("model        =  %s\n" % self.model)
         fh.write("features     =  %s\n" % ' '.join(self.features))
         fh.write("config_file  =  %s\n" %
                  os.path.basename(rconfig.pipeline_config_file))
         fh.write("git_commit   =  %s" % get_git_commit())
     shutil.copyfile(self.rconfig.pipeline_config_file,
                     self.info_file_config)
     shutil.copyfile(self.file_list, self.info_file_filelist)
 def update_state(self, limit, t1):
     """Update the content of state/processed.txt and state/processing-history.txt."""
     # TODO: should not just print the files processed in the history, but also the
     # range of files.
     time_elapsed = time.time() - t1
     processed = "%d\n" % self.files_processed
     create_file(os.path.join(self.path, 'state', 'processed.txt'),
                 processed)
     history_file = os.path.join(self.path, 'state',
                                 'processing-history.txt')
     fh = open(history_file, 'a')
     fh.write("%s\t%d\t%s\t%s\t%s\n" %
              (self.stage_name, limit, time.strftime("%Y:%m:%d-%H:%M:%S"),
               get_git_commit(), time_elapsed))
Beispiel #16
0
def write_info(rconfig, dirname, filelist):
    """Generate a file with general information and copy the file list to the
    annotation directory."""
    print "Writing general info..."
    ensure_path(dirname)
    with open(os.path.join(dirname, 'annotate.info.general.txt'), 'w') as fh:
        fh.write("$ python %s\n\n" % ' '.join(sys.argv))
        fh.write("corpus            =  %s\n" % os.path.abspath(rconfig.corpus))
        fh.write("file_list         =  %s\n" % os.path.abspath(filelist))
        fh.write("config_file       =  %s\n" % \
                     os.path.basename(rconfig.pipeline_config_file))
        fh.write("git_commit        =  %s" % get_git_commit())
    print "Copying %s..." % (filelist)
    shutil.copyfile(filelist,
                    os.path.join(dirname, 'annotate.info.filelist.txt'))
Beispiel #17
0
 def _create_info_files(self):
     if os.path.exists(self.info_file_general):
         sys.exit("WARNING: already ran indexer for batch %s" % self.batch)
     print "[Collector] initializing data/o1_index/%s directory" % self.batch
     ensure_path(self.batch_dir)
     with open(self.info_file_general, 'w') as fh:
         fh.write("$ python %s\n\n" % ' '.join(sys.argv))
         fh.write("batch        =  %s\n" % self.batch)
         fh.write("file_list    =  %s\n" % self.file_list)
         fh.write("config_file  =  %s\n" %
                  os.path.basename(rconfig.pipeline_config_file))
         fh.write("git_commit   =  %s\n" % get_git_commit())
     shutil.copyfile(self.rconfig.pipeline_config_file,
                     self.info_file_config)
     shutil.copyfile(self.file_list, self.info_file_filelist)
 def _create_info_files(self, t1):
     with open(self.info_file_general, 'w') as fh:
         fh.write("$ python %s\n\n" % ' '.join(sys.argv))
         fh.write("output           =  %s\n" % os.path.abspath(self.output))
         fh.write("file_list        =  %s\n" % self.file_list)
         fh.write("model            =  %s\n" % self.model)
         fh.write("features         =  %s\n" % ' '.join(self.features))
         fh.write("config_file      =  %s\n" % rconfig.pipeline_config_file)
         fh.write("timestamp        =  %s\n" %
                  time.strftime("%Y%m%d:%H%M%S"))
         fh.write("processing time  =  %ds\n" % int(time.time() - t1))
         fh.write("git_commit       =  %s" % get_git_commit())
     if self.rconfig.pipeline_config_file is not None:
         shutil.copyfile(self.rconfig.pipeline_config_file,
                         self.info_file_config)
     shutil.copyfile(self.file_list, self.info_file_filelist)
Beispiel #19
0
 def _create_info_files(self):
     if os.path.exists(self.info_file_general):
         sys.exit("WARNING: already have matcher results in %s" %
                  self.output)
     print "[Matcher] initializing data/o2_matcher/%s directory" % self.output
     ensure_path(self.output_dir)
     with open(self.info_file_general, 'w') as fh:
         fh.write("$ python %s\n\n" % ' '.join(sys.argv))
         fh.write("output       =  %s\n" % self.output)
         fh.write("file_list    =  %s\n" % self.file_list)
         #fh.write("config_file  =  %s\n" % os.path.basename(rconfig.pipeline_config_file))
         fh.write("config_file  =  %s\n" % rconfig.pipeline_config_file)
         fh.write("git_commit   =  %s\n" % get_git_commit())
     if self.rconfig.pipeline_config_file is not None:
         shutil.copyfile(self.rconfig.pipeline_config_file,
                         self.info_file_config)
     shutil.copyfile(self.file_list, self.info_file_filelist)
Beispiel #20
0
def _write_info(mallet_file, model_file, mtrainer, out_file, stderr_file, t1):
    fh = open(model_file + '.info', 'w')
    fh.write("$ python %s\n\n" % ' '.join(sys.argv))
    fh.write("mallet file       =  %s\n" % os.path.abspath(mallet_file))
    fh.write("model file        =  %s\n" % os.path.abspath(model_file))
    fh.write("trainer settings  =  %s\n" % mtrainer.settings())
    fh.write("timestamp         =  %s\n" % time.strftime("%Y%m%d:%H%M%S"))
    fh.write("time elapsed      =  %ds\n" % (time.time() - t1))
    fh.write("git_commit        =  %s\n\n" % get_git_commit())
    fh.write("$ %s\n\n" % mtrainer.saved_create_vectors_command)
    fh.write("$ %s\n" % mtrainer.saved_create_model_command)
    fh.write("\nContents of .out file:\n\n")
    for line in open(out_file):
        fh.write("    %s" % line)
    fh.write("\nContents of .stderr file:\n\n")
    for line in open(stderr_file):
        line = line.replace("\f", "\n    ")
        line = line.replace("\r", "\n    ")
        fh.write("    %s" % line)
    for cmd in mtrainer.saved_create_cinfo_commands:
        fh.write("\n$ %s\n" % cmd)
Beispiel #21
0
        print fname
        fh_terms = open_input_file(fname)
        count = 0
        for line in fh_terms:
            count += 1
            if count > 100000: break
            if count % 500000 == 0: print '  ', count
            fields = line.split("\t")
            term = fields[0]
            term_count = int(fields[2])
            terms[term] = terms.get(term, 0) + term_count
    return terms


if __name__ == '__main__':

    target_dir = sys.argv[1]
    result_files = []
    for exp in sys.argv[2:]:
        files = glob.glob(exp)
        result_files.extend(files)

    ensure_path(target_dir)
    infofile = target_dir + '/merged_term_frequencies.info.txt'
    fh_info = codecs.open(infofile, 'w', encoding='utf-8')
    fh_info.write("git commit = %s\n\n" % get_git_commit())
    for fname in result_files:
        fh_info.write(fname + u"\n")

    merge_result_files(target_dir, result_files)