def main(argv): if len(argv) == 0: # print 'Usage: my_program command --option <argument>' print(__doc__) else: args = docopt(__doc__, argv=argv) if args['aggregate_labels']: with tempfile.TemporaryDirectory() as tmpdir: scrapping.scrap(args['<bundles_dir>'], tmpdir) notcut_trees = args['<out_dir>'] + '/' + args[ '<prefix>'] + '_notcut_trees.txt' notcut_priors = args['<out_dir>'] + '/' + args[ '<prefix>'] + '_notcut_priors.csv' cut_trees = args['<out_dir>'] + '/' + args[ '<prefix>'] + '_cut_trees.txt' split_trees = args['<out_dir>'] + '/' + args[ '<prefix>'] + '_split_trees.txt' withdummy_trees = args['<out_dir>'] + '/' + args[ '<prefix>'] + '_withdummy_trees.txt' cut_priors = args['<out_dir>'] + '/' + args[ '<prefix>'] + '_cut_priors.csv' split_priors = args['<out_dir>'] + '/' + args[ '<prefix>'] + '_split_priors.csv' out_df = args['<out_dir>'] + '/' + args['<prefix>'] + '_df.csv' lt.aggregate_labels_from_bundles_to_trees( args['<all_trees.txt>'], tmpdir, notcut_trees) tt.remove_duplicate_nodes(notcut_trees, notcut_trees) print('Duplicate nodes were removed.') tt.translate_list_of_trees(notcut_trees, notcut_trees) print('Trees were translated.') lt.remove_2nd_tags(notcut_trees) print('2nd tags were removed.') lt.print_label_priors(notcut_trees, notcut_priors) lt.cut_non_labeled_branches(notcut_trees, cut_trees) print('Non-labeled branches were cut.') lt.apply_split_labels_to_trees(cut_trees, split_trees) lt.print_label_priors(split_trees, split_priors) print('Split labels were applied.') lt.apply_start_split_end_labels_to_trees( cut_trees, withdummy_trees) print('Start-Split-End labels were applied.') lt.print_label_priors(cut_trees, cut_priors) lt.create_data_csv(split_trees, out_df) print('Labels aggregated successfully.') if args['rename_labels']: lt.rename_labels(args['<in_file>'], args['<out_file>']) if args['rename_tags_in_trees']: lt.rename_tags_in_trees(args['<trees.txt>'], args['<out_trees.txt>']) if args['rename_tags_in_df']: lt.rename_tags_in_df(args['<df.csv>'], args['<out_df.csv>']) if args['create_bundles']: tt.create_bundles(args['<all_trees.txt>'], args['<tree_ids>'], args['<out_dir>']) if args['rework_labels']: lt.rework_labels(args['<labeled_trees.txt>'], args['<rework_settings.txt>'], args['<out_trees.txt>']) if args['create_df']: lt.create_data_csv(args['<labeled_trees.txt>'], args['<out_df.csv>'], args['--rework_settings'], args['--ignore_deleted']) if args['print']: if args['dis_branch']: dst.print_branch(trees_path=args['<trees.txt>'], probas_path=args['<probas.dispr>'], branch_atlas_id=args['<branch_atlas_id>'], out_file=args['<out_file.csv>']) if args['dis_tags_npmi']: dst.print_dis_tags_npmi(trees_path=args['<trees.txt>'], probas_path=args['<probas.dispr>'], out_file=args['<out.csv>'], just_count=args['--just_count'], just_pmi=args['--just_pmi']) if args['tags_npmi']: lt.print_tags_npmi_table(args['<labeled_trees.txt>'], args['<out.csv>'], args['--just_count'], args['--log']) if args['label_priors']: lt.print_label_priors(args['<labeled_trees.txt>'], args['<out.csv>'], args['--per_tree']) if args['label_details']: lt.print_label_details(args['<labeled_trees.txt>'], args['<out.csv>']) if args['label_cooc_lists']: lt.print_label_cooc_lists(args['<labeled_trees.txt>'], args['<out.csv>']) if args['label_passes']: lt.print_label_passes(args['<labeled_trees.txt>'], args['<out_dir>']) if args['label_ngrams']: lt.print_label_ngrams( args['<labeled_trees.txt>'], args['<out_dir>'], [int(n) for n in args['<n1,n2,n3>'].split(',')]) if args['forward_backward_transitions']: lt.print_forward_backward_transitions( args['<labeled_trees.txt>'], args['<out_dir>'], [int(n) for n in args['<n1,n2,n3>'].split(',')]) if args['label_ngram_lists']: lt.print_label_ngram_lists( args['<labeled_trees.txt>'], args['<out_file>'], [int(n) for n in args['<n1,n2,n3>'].split(',')], args['<min_count>']) if args['trees_statistics']: tt.create_list_of_trees_statistics(args['<trees.txt>'], args['<stats.csv>']) if args['label_stats']: trees_path = args['<trees.txt>'] out_dir = args['<stats_dir>'] priors_csv = out_dir + '/priors.csv' npmi = out_dir + '/npmi.csv' correlation_log = out_dir + '/corr_log.txt' pmi = out_dir + '/pmi.csv' matthews = out_dir + '/matthews_correlation.csv' together_counts = out_dir + '/together_counts.csv' general_stats = out_dir + '/general_stats.csv' lt.print_label_ngrams(trees_path, out_dir, [2, 3, 4, 5, 6, 7]) lt.print_label_priors(trees_path, priors_csv) lt.print_tags_npmi_table(trees_path, npmi, log_file=correlation_log) lt.print_tags_npmi_table(trees_path, pmi, just_pmi=True) lt.print_tags_npmi_table(trees_path, together_counts, just_count=True) tt.create_list_of_trees_statistics(trees_path, general_stats) lt.print_tags_matthews(trees_path, matthews) if args['grid_search']: cv = None if args['--cv']: cv = int(args['--cv']) target_tags = None if args['--target_tags']: target_tags = args['--target_tags'].split(',') split_tags = None if args['--split_tags']: split_tags = [int(i) for i in args['--split_tags'].split(',')] adders = args['--adders'] ct.grid_search(args['<data_prefix>'], args['<pipe_prefix>'], args['<params_prefix>'], adders=adders, cv=cv, split_tags=split_tags, target_tags=target_tags) if args['train_test']: target_tags = None if args['--target_tags']: target_tags = args['--target_tags'].split(',') adders = args['--adders'] cls = args['--cls'] ct.train_test(args['<train>'], args['<test>'], args['<pipe>'], args['<params>'], args['<mode>'], classifier=cls, target_tags=target_tags, adders=adders) if args['aggregate_train_test']: target_tags = None if args['--target_tags']: target_tags = args['--target_tags'].split(',') adders = args['--adders'] cls = args['--cls'] ct.aggregate_traintest(args['<train>'], args['<test>'], args['<pipe>'], args['<params>'], args['<mode>'], classifier=cls, target_tags=target_tags, adders=adders) if args['lstmsa']: ct.lstm_sa(data_prefix=args['<data_prefix>'], target_tags=args['--target_tags'], pipes=args['<pipes>'], hidden_dims=args['<hidden_dims>']) if args['aggregate_scores']: ct.aggregate_scores(args['<data_prefix>'], args['<pipe_prefix>']) if args['aggregate_scores_lstmsa']: ct.aggregate_scores_lstmsa(args['<data_prefix>'], args['--tags']) if args['create_doc2vec_train_file']: ft.create_d2vtrain_lines(args['<trees.txt>'], args['<out_file.txt>']) if args['train_doc2vec']: ft.train_doc2vec(args['<train_file.txt>'], args['<out_model.txt>'], int(args['<epochs>']), int(args['<dim>'])) if args['prefit_adders']: ft.prefit_adders(args['<data_prefix>'], args['--adders']) if args['prepare_dissent']: dst.run_trees(args['<trees.txt>'], args['<out_path>']) if args['merge_disprobas']: dst.merge_probas(probas_dir=args['<probas_dir>'], out_path=args['<out_file.dispr>'])