def extend_write_eds(fig2parents, e2types, outfile, e2freq): e2types_parents = {} for mye in e2types: tt = e2types[mye] extended_tt = addhighleveltypes(tt, fig2parents) e2types_parents[mye] = extended_tt write_ds(e2types_parents, e2freq, outfile) return e2types_parents
f = open(outdir + '/ds_lines_figer', 'w') logger.info('write test lines in %s', outdir + 'ds_lines_figer') for myline in lines: f.write(myline.strip()) f.write('\n') f.close() if __name__ == '__main__': fbname2figer = laod_figermapping('/nfs/data3/yadollah/nlptools_resources/figer/config/types.map') fig2parents = load_parents('/nfs/data3/yadollah/nlptools_resources/figer/config/yy_type2parents') mid2name = load_type_names('/nfs/data1/proj/yadollah/cluewebwork/nlu/dataForImport/type.name') logger.info('size types with names: %d', len(mid2name)) dsfile = sys.argv[1] ds_linesfile = sys.argv[2] outdir = sys.argv[3] (e2types, t2ents, e2freq) = load_dataset(dsfile, logger) logger.info(len(e2types)) newConvertedDs = filter_write_ds(e2types, fbname2figer, mid2name, outdir, fig2parents) logger.info('size of dataset after filtering to figer type: %d', len(newConvertedDs)) (new_dslines, e2freq) = filter_ds_lines(ds_linesfile, newConvertedDs, outdir) logger.info('#lines after filtering to figer type: %d', len(new_dslines)) write_ds(newConvertedDs, e2freq, outdir + 'Eds_figer') write_lines(new_dslines, e2freq, outdir)