Ejemplo n.º 1
0
def do_test_gold_graph(opt):
    """

    :param opt:
    :return:
    """
    instances = preprocess(opt.amr_file,
                           start_corenlp=False,
                           input_format=opt.amrfmt,
                           prp_format=opt.prpfmt)
    gold_amr = []
    for inst in instances:
        GraphState.sent = inst.tokens
        gold_amr.append(GraphState.get_parsed_amr(inst.gold_graph))
    write_parsed_amr(gold_amr, instances, amr_file, 'abt.gold')
    print "Done output AMR!"
Ejemplo n.º 2
0
    def parse_corpus_test(self, instances, EVAL=False):
        start_time = time.time()
        parsed_amr = []
        span_graph_pairs = []

        if EVAL:
            Parser.cm = np.zeros(shape=(len(GraphState.action_table), len(GraphState.action_table)))
            Parser.rtx = []
            Parser.rty = []
            Parser.steps = []

            n_correct_labeled_total = 0.0
            n_correct_total = 0.0
            n_parsed_total = 0.0
            n_gold_total = 0.0

            n_correct_tag_total = 0.0
            n_parsed_tag_total = 0.0
            brackets = defaultdict(set)
            results = []
            n_gold_tag_total = 0.0
            # cm_total = np.zeros(shape=(len(GraphState.action_table),len(GraphState.action_table)))
            # if WRITE_FAKE_AMR: out_fake_amr = open('data/fake_amr_triples.txt','w')

            for i, inst in enumerate(instances, 1):
                per_start_time = time.time()
                step, state = self.parse(inst, train=False)
                per_parse_time = round(time.time() - per_start_time, 3)

                Parser.rtx.append(len(inst.tokens))
                Parser.rty.append(per_parse_time)
                Parser.steps.append(step)

                n_correct_labeled_arc, n_correct_arc, n_parsed_arc, n_gold_arc, n_correct_tag, n_parsed_tag, n_gold_tag = (
                    state.evaluate()
                )

                p = n_correct_arc / n_parsed_arc if n_parsed_arc else 0.0
                r = n_correct_arc / n_gold_arc if n_gold_arc else 0.0
                f = 2 * p * r / (p + r) if p + r != 0.0 else 0.0
                """
                results.append(f)

                if f <= 0.4 and f >= .0:
                    brackets['0-40'].add(inst.sentID)
                elif f <= 0.6 and f > 0.4:
                    brackets['40-60'].add(inst.sentID)
                else:
                    brackets['60-100'].add(inst.sentID)
                """
                n_correct_labeled_total += n_correct_labeled_arc
                n_correct_total += n_correct_arc
                n_parsed_total += n_parsed_arc
                n_gold_total += n_gold_arc

                n_correct_tag_total += n_correct_tag
                n_parsed_tag_total += n_parsed_tag
                n_gold_tag_total += n_gold_tag

                p1 = n_correct_arc / n_parsed_arc if n_parsed_arc != 0.0 else 0.0
                r1 = n_correct_arc / n_gold_arc
                f1 = 2 * p1 * r1 / (p1 + r1) if p1 + r1 != 0.0 else 0.0

                lp1 = n_correct_labeled_arc / n_parsed_arc if n_parsed_arc != 0.0 else 0.0
                lr1 = n_correct_labeled_arc / n_gold_arc
                lf1 = 2 * lp1 * lr1 / (lp1 + lr1) if lp1 + lr1 != 0.0 else 0.0

                tp1 = n_correct_tag / n_parsed_tag if n_parsed_tag != 0.0 else 0.0
                tr1 = n_correct_tag / n_gold_tag if n_gold_tag != 0.0 else 0.0

                score = (p1, r1, f1, lp1, lr1, lf1, tp1, tr1)
                ##########################
                # gold edge labeled amr; gold tag labeled amr ;for comparison
                # garc_graph = state.get_gold_edge_graph()
                # parsed_amr.append(GraphState.get_parsed_amr(garc_graph))
                #
                # gtag_graph = state.get_gold_tag_graph()
                # parsed_amr.append(GraphState.get_parsed_amr(gtag_graph))

                # g_graph = state.get_gold_label_graph()
                # parsed_amr.append(GraphState.get_parsed_amr(g_graph))
                ############################

                parsed_amr.append(GraphState.get_parsed_amr(state.A))
                span_graph_pairs.append((state.A, state.gold_graph, score))
                print >> self.elog, "Done parsing sentence %s" % (state.sentID)

            print >> self.elog, "Parsing on %s instances takes %s" % (
                str(i),
                datetime.timedelta(seconds=round(time.time() - start_time, 0)),
            )
            p = n_correct_total / n_parsed_total if n_parsed_total != 0.0 else 0.0
            r = n_correct_total / n_gold_total
            f = 2 * p * r / (p + r)
            print >> self.elog, "Unlabeled Precision:%s Recall:%s F1:%s" % (p, r, f)

            lp = n_correct_labeled_total / n_parsed_total
            lr = n_correct_labeled_total / n_gold_total
            lf = 2 * lp * lr / (lp + lr)
            print >> self.elog, "Labeled Precision:%s Recall:%s F1:%s" % (lp, lr, lf)

            tp = n_correct_tag_total / n_parsed_tag_total
            tr = n_correct_tag_total / n_gold_tag_total
            print >> self.elog, "Tagging Precision:%s Recall:%s" % (tp, tr)

            # pickle.dump((Parser.rtx,Parser.rty,Parser.steps),open('draw-graph/rt.pkl','wb'))
            # plt.plot(Parser.rtx,Parser.rty,'o')
            # plt.savefig('draw-graph/rt.png')
            # plt.plot(Parser.rtx,Parser.steps,'o')
            # plt.xlabel('Sentence length')
            # plt.ylabel('Actions')
            # plt.savefig('draw-graph/rt-act.png')

            print "Confusion matrix action class:"
            np.set_printoptions(suppress=True)
            print np.round(np.divide(Parser.cm, 10))

            ##############################
            # import random
            # print random.sample(brackets['0-40'],10)
            # print random.sample(brackets['40-60'],10)
            # print random.sample(brackets['60-100'],10)

            # return results
        else:

            for i, inst in enumerate(instances, 1):
                per_start_time = time.time()
                step, state = self.parse(inst, train=False)
                per_parse_time = round(time.time() - per_start_time, 3)

                parsed_amr.append(GraphState.get_parsed_amr(state.A))
                print >> self.elog, "Done parsing sentence %s" % (state.sentID)

            print >> self.elog, "Parsing on %s instances takes %s" % (
                str(i),
                datetime.timedelta(seconds=round(time.time() - start_time, 0)),
            )

        return span_graph_pairs, parsed_amr
Ejemplo n.º 3
0
    def parse_corpus_test(self, instances, EVAL=False):
        start_time = time.time()
        parsed_amr = []
        span_graph_pairs = []

        if EVAL:
            Parser.cm = np.zeros(shape=(len(GraphState.action_table),len(GraphState.action_table)))
            Parser.rtx = []
            Parser.rty = []
            Parser.steps = []

            n_correct_labeled_total = .0
            n_correct_total = .0
            n_parsed_total = .0
            n_gold_total = .0

            n_correct_tag_total = .0
            n_parsed_tag_total = .0
            brackets = defaultdict(set)
            results = []
            n_gold_tag_total = .0
            #cm_total = np.zeros(shape=(len(GraphState.action_table),len(GraphState.action_table)))
            #if WRITE_FAKE_AMR: out_fake_amr = open('data/fake_amr_triples.txt','w')

            for i,inst in enumerate(instances,1):
                per_start_time = time.time()
                step,state = self.parse(inst,train=False)
                per_parse_time = round(time.time()-per_start_time,3)

                Parser.rtx.append(len(inst.tokens))
                Parser.rty.append(per_parse_time)
                Parser.steps.append(step)

                n_correct_labeled_arc,n_correct_arc,n_parsed_arc,n_gold_arc,n_correct_tag,n_parsed_tag,n_gold_tag = state.evaluate()

                p = n_correct_arc/n_parsed_arc if n_parsed_arc else .0
                r = n_correct_arc/n_gold_arc if n_gold_arc else .0
                f = 2*p*r/(p+r) if p+r != .0 else .0
                '''
                results.append(f)

                if f <= 0.4 and f >= .0:
                    brackets['0-40'].add(inst.sentID)
                elif f <= 0.6 and f > 0.4:
                    brackets['40-60'].add(inst.sentID)
                else:
                    brackets['60-100'].add(inst.sentID)
                '''
                n_correct_labeled_total += n_correct_labeled_arc
                n_correct_total += n_correct_arc
                n_parsed_total += n_parsed_arc
                n_gold_total += n_gold_arc

                n_correct_tag_total +=  n_correct_tag
                n_parsed_tag_total +=  n_parsed_tag
                n_gold_tag_total += n_gold_tag

                p1 = n_correct_arc/n_parsed_arc if n_parsed_arc != .0 else .0
                r1 = n_correct_arc/n_gold_arc
                f1 = 2*p1*r1/(p1+r1) if p1+r1 != .0 else .0

                lp1 = n_correct_labeled_arc/n_parsed_arc if n_parsed_arc != .0 else .0
                lr1 = n_correct_labeled_arc/n_gold_arc
                lf1 = 2*lp1*lr1/(lp1+lr1) if lp1+lr1 != .0 else .0

                tp1 = n_correct_tag/n_parsed_tag if n_parsed_tag != .0 else .0
                tr1 = n_correct_tag/n_gold_tag if n_gold_tag != .0 else .0

                score = (p1,r1,f1,lp1,lr1,lf1,tp1,tr1)
                ##########################
                #gold edge labeled amr; gold tag labeled amr ;for comparison
                #garc_graph = state.get_gold_edge_graph()
                #parsed_amr.append(GraphState.get_parsed_amr(garc_graph))
                #
                #gtag_graph = state.get_gold_tag_graph()
                #parsed_amr.append(GraphState.get_parsed_amr(gtag_graph))

                #g_graph = state.get_gold_label_graph()
                #parsed_amr.append(GraphState.get_parsed_amr(g_graph))
                ############################


                parsed_amr.append(GraphState.get_parsed_amr(state.A))
                span_graph_pairs.append((state.A,state.gold_graph,score))
                print >> self.elog, "Done parsing sentence %s" % (state.sentID)

            print >> self.elog,"Parsing on %s instances takes %s" % (str(i),datetime.timedelta(seconds=round(time.time()-start_time,0)))
            p = n_correct_total/n_parsed_total if n_parsed_total != .0 else .0
            r = n_correct_total/n_gold_total
            f = 2*p*r/(p+r)
            print >> self.elog,"Unlabeled Precision:%s Recall:%s F1:%s" % (p,r,f)

            lp = n_correct_labeled_total/n_parsed_total
            lr = n_correct_labeled_total/n_gold_total
            lf = 2*lp*lr/(lp+lr)
            print >> self.elog,"Labeled Precision:%s Recall:%s F1:%s" % (lp,lr,lf)

            tp = n_correct_tag_total/n_parsed_tag_total
            tr = n_correct_tag_total/n_gold_tag_total
            print >> self.elog,"Tagging Precision:%s Recall:%s" % (tp,tr)


            #pickle.dump((Parser.rtx,Parser.rty,Parser.steps),open('draw-graph/rt.pkl','wb'))
            #plt.plot(Parser.rtx,Parser.rty,'o')
            #plt.savefig('draw-graph/rt.png')
            #plt.plot(Parser.rtx,Parser.steps,'o')
            #plt.xlabel('Sentence length')
            #plt.ylabel('Actions')
            #plt.savefig('draw-graph/rt-act.png')

            print "Confusion matrix action class:"
            np.set_printoptions(suppress=True)
            print np.round(np.divide(Parser.cm,10))


            ##############################
            #import random
            #print random.sample(brackets['0-40'],10)
            #print random.sample(brackets['40-60'],10)
            #print random.sample(brackets['60-100'],10)

            #return results
        else:
            print('Look Here Matt')
            print('Before Results = ' + str(parsed_amr))
            for i,inst in enumerate(instances,1):
                per_start_time = time.time()
                step,state = self.parse(inst,train=False)
                print('step = ' + str(step))
                print('state = ' + str(state))
                per_parse_time = round(time.time()-per_start_time,3)

                parsed_amr.append(GraphState.get_parsed_amr(state.A))
                if self.verbose > 1: print >> self.elog, "Done parsing sentence %s" % (state.sentID)

            print >> self.elog,"Parsing on %s instances takes %s" % (str(i),datetime.timedelta(seconds=round(time.time()-start_time,0)))

        print('Results = ' + str(parsed_amr))
        return span_graph_pairs, parsed_amr
Ejemplo n.º 4
0
def main():
    arg_parser = argparse.ArgumentParser(
        description="Brandeis transition-based AMR parser 1.0")
    build_opts(arg_parser)

    args = arg_parser.parse_args()

    amr_file = args.amr_file
    instances = None
    train_instance = None
    constants.FLAG_COREF = args.coref
    constants.FLAG_PROP = args.prop
    constants.FLAG_RNE = args.rne
    constants.FLAG_VERB = args.verblist
    constants.FLAG_ONTO = args.onto
    constants.FLAG_DEPPARSER = args.depparser

    if args.mode == 'preprocess':
        # using corenlp to preprocess the sentences
        do_preproces(args)

    elif args.mode == 'test_gold_graph':
        # preprocess the JAMR aligned amr
        do_test_gold_graph(args)

    elif args.mode == 'align':
        # do alignment
        if args.input_file:
            instances = pickle.load(open(args.input_file, 'rb'))
        else:
            raise ValueError(
                "Missing data file! specify it using --input or using preprocessing!"
            )
        gold_instances_file = args.input_file.split('.')[0] + '_gold.p'

        print >> log, "Doing alignment..."

        if LOGGED:
            saveerr = sys.stderr
            sys.stderr = open('./log/alignment.log', 'w')

        amr_aligner = Aligner(verbose=args.verbose)
        ref_graphs = []
        begin = args.begin
        counter = 1
        for i in range(len(instances)):
            snt = instances[i].text
            amr = instances[i].amr
            if args.verbose > 1:
                print >> log, counter
                print >> log, "Sentence:"
                print >> log, snt + '\n'
                print >> log, "AMR:"
                print >> log, amr.to_amr_string()

            alresult = amr_aligner.apply_align(snt, amr)
            ref_amr_graph = SpanGraph.init_ref_graph(amr, alresult)
            instances[i].addGoldGraph(ref_amr_graph)
            if args.verbose > 1:
                print >> log, amr_aligner.print_align_result(alresult, amr)
            counter += 1

        pickle.dump(instances, open(gold_instances_file, 'wb'),
                    pickle.HIGHEST_PROTOCOL)
        if LOGGED:
            sys.stderr.close()
            sys.stderr = saveerr
        print >> log, "Done alignment and gold graph generation."
        sys.exit()

    elif args.mode == 'userGuide':
        # test user guide actions
        print 'Read in training instances...'
        train_instances = preprocess(amr_file, False)

        sentID = int(raw_input("Input the sent ID:"))
        amr_parser = Parser()
        amr_parser.testUserGuide(train_instances[sentID])

        sys.exit()

    elif args.mode == 'oracleGuide':
        # test deterministic oracle
        train_instances = preprocess(amr_file,
                                     start_corenlp=False,
                                     input_format=args.amrfmt,
                                     prp_format=args.prpfmt)
        try:
            hand_alignments = load_hand_alignments(amr_file +
                                                   str('.hand_aligned'))
        except IOError:
            hand_alignments = []

        start_step = args.start_step
        begin = args.begin
        amr_parser = Parser(oracle_type=DET_T2G_ORACLE_ABT,
                            verbose=args.verbose)
        #ref_graphs = pickle.load(open('./data/ref_graph.p','rb'))
        n_correct_total = .0
        n_parsed_total = .0
        n_gold_total = .0
        pseudo_gold_amr = []
        n_correct_tag_total = .0
        n_parsed_tag_total = 0.
        n_gold_tag_total = .0

        gold_amr = []
        aligned_instances = []
        for instance in train_instances[begin:]:

            if hand_alignments and instance.comment[
                    'id'] not in hand_alignments:
                continue
            state = amr_parser.testOracleGuide(instance, start_step)
            n_correct_arc, n1, n_parsed_arc, n_gold_arc, n_correct_tag, n_parsed_tag, n_gold_tag = state.evaluate(
            )
            if n_correct_arc != n1:
                import pdb
                pdb.set_trace()
            n_correct_total += n_correct_arc
            n_parsed_total += n_parsed_arc
            n_gold_total += n_gold_arc
            p = n_correct_arc / n_parsed_arc if n_parsed_arc else .0
            r = n_correct_arc / n_gold_arc if n_gold_arc else .0
            indicator = 'PROBLEM!' if p < 0.5 else ''
            if args.verbose > 2:
                print >> sys.stderr, "Precision: %s Recall: %s  %s\n" % (
                    p, r, indicator)
            n_correct_tag_total += n_correct_tag
            n_parsed_tag_total += n_parsed_tag
            n_gold_tag_total += n_gold_tag
            p1 = n_correct_tag / n_parsed_tag if n_parsed_tag else .0
            r1 = n_correct_tag / n_gold_tag if n_gold_tag else .0
            if args.verbose > 2:
                print >> sys.stderr, "Tagging Precision:%s Recall:%s" % (p1,
                                                                         r1)

            instance.comment['alignments'] +=\
                ''.join(' %s-%s|%s' % (idx-1, idx, instance.amr.get_pid(state.A.abt_node_table[idx]))
                        for idx in state.A.abt_node_table if isinstance(idx,int))

            aligned_instances.append(instance)
            pseudo_gold_amr.append(GraphState.get_parsed_amr(state.A))
        pt = n_correct_total / n_parsed_total if n_parsed_total != .0 else .0
        rt = n_correct_total / n_gold_total if n_gold_total != .0 else .0
        ft = 2 * pt * rt / (pt + rt) if pt + rt != .0 else .0
        write_parsed_amr(pseudo_gold_amr, aligned_instances, amr_file,
                         'pseudo-gold', hand_alignments)
        print "Total Accuracy: %s, Recall: %s, F-1: %s" % (pt, rt, ft)

        tp = n_correct_tag_total / n_parsed_tag_total if n_parsed_tag_total != .0 else .0
        tr = n_correct_tag_total / n_gold_tag_total if n_gold_tag_total != .0 else .0
        print "Tagging Precision:%s Recall:%s" % (tp, tr)

    elif args.mode == 'train':
        do_train(args)

    elif args.mode == 'parse':
        # actual parsing
        test_instances = preprocess(amr_file,
                                    start_corenlp=False,
                                    input_format=args.amrfmt,
                                    prp_format=args.prpfmt)
        if args.section != 'all':
            print "Choosing corpus section: %s" % (args.section)
            tcr = constants.get_corpus_range(args.section, 'test')
            test_instances = test_instances[tcr[0]:tcr[1]]

        #random.shuffle(test_instances)
        print >> experiment_log, "Loading model: ", args.model
        model = Model.load_model(args.model)
        parser = Parser(model=model,
                        oracle_type=DET_T2G_ORACLE_ABT,
                        action_type=args.actionset,
                        verbose=args.verbose,
                        elog=experiment_log)
        print >> experiment_log, "BEGIN PARSING"
        span_graph_pairs, results = parser.parse_corpus_test(test_instances)
        parsed_suffix = '%s.%s.parsed' % (args.section,
                                          args.model.split('.')[-2])
        write_parsed_amr(results,
                         test_instances,
                         amr_file,
                         suffix=parsed_suffix)

        print >> experiment_log, "DONE PARSING"
        if args.smatcheval:
            smatch_path = "./smatch_2.0.2/smatch.py"
            python_path = 'python'
            options = '--pr -f'
            parsed_filename = amr_file + '.' + parsed_suffix
            command = '%s %s %s %s %s' % (python_path, smatch_path, options,
                                          parsed_filename, amr_file)

            print 'Evaluation using command: ' + (command)
            print subprocess.check_output(command,
                                          stderr=subprocess.STDOUT,
                                          shell=True)

    elif args.mode == 'eval':
        '''break down error analysis'''
        # TODO: here use pickled file, replace it with parsed AMR and gold AMR
        span_graph_pairs = pickle.load(open(args.eval[0], 'rb'))
        instances = pickle.load(open(args.eval[1], 'rb'))

        amr_parser = Parser(oracle_type=DET_T2G_ORACLE_ABT,
                            verbose=args.verbose)
        error_stat = defaultdict(
            lambda: defaultdict(lambda: defaultdict(list)))
        for spg_pair, instance in zip(span_graph_pairs, instances):
            amr_parser.errorAnalyze(spg_pair[0], spg_pair[1], instance,
                                    error_stat)

    else:
        arg_parser.print_help()
Ejemplo n.º 5
0
def main():
    '''
    usage = "Usage:%prog [options] amr_file"
    opt = OptionParser(usage=usage)
    opt.add_option("-v",action="store",dest="verbose",type='int',
                   default=0,help="set up verbose level")
    opt.add_option("-a",action="store_true",dest="align",
                   default=False,help="do alignment between sentence and amr")
    opt.add_option("-b",action="store",dest="begin",type='int',
                   default=0,help="for debugging"
                   "When do alignment, where the alignment begins"
                   "When test oracle, where to begin")
    opt.add_option("-s",action="store",dest="start_step",type='int',
                   default=0,help="where the step begins,for testing oracle")
    opt.add_option("-o",action="store",dest="sentfilep",
                   help="output sentences to file and parse the sentence into dependency graph")
    opt.add_option("-i",action="store",dest="parsedfilep",
                   help="read parsed dependency graph from file")
    opt.add_option("-g",action="store",dest="userActfile",
                   help="read user input action sequences as guide")
    opt.add_option("-d",action="store",dest="oracle",type='int',default=0,\
                   help="test the output actions of deterministic oracle: "
                         "1: tree oracle 2: list-based oracle")
    '''
    arg_parser = argparse.ArgumentParser(description="Brandeis transition-based AMR parser 1.0")
    
    arg_parser.add_argument('-v','--verbose',type=int,default=0,help='set up verbose level for debug')
    arg_parser.add_argument('-b','--begin',type=int,default=0,help='specify which sentence to begin the alignment or oracle testing for debug')
    arg_parser.add_argument('-s','--start_step',type=int,default=0,help='specify which step to begin oracle testing;for debug')
    #arg_parser.add_argument('-i','--input_file',help='the input: preprocessed data instances file for aligner or training')
    arg_parser.add_argument('-d','--dev',help='development file')
    arg_parser.add_argument('-as','--actionset',choices=['basic'],default='basic',help='choose different action set')
    arg_parser.add_argument('-m','--mode',choices=['preprocess','test_gold_graph','align','userGuide','oracleGuide','train','parse','eval'],help="preprocess:generate pos tag, dependency tree, ner\n" "align:do alignment between AMR graph and sentence string")
    arg_parser.add_argument('-dp','--depparser',choices=['stanford','stanfordConvert','stdconv+charniak','clear','mate','turbo'],default='stdconv+charniak',help='choose the dependency parser')
    arg_parser.add_argument('--coref',action='store_true',help='flag to enable coreference information')
    arg_parser.add_argument('--prop',action='store_true',help='flag to enable semantic role labeling information')
    arg_parser.add_argument('--model',help='specify the model file')
    arg_parser.add_argument('--feat',help='feature template file')
    arg_parser.add_argument('-iter','--iterations',default=1,type=int,help='training iterations')
    arg_parser.add_argument('amr_file',nargs='?',help='amr annotation file/input sentence file for parsing')
    arg_parser.add_argument('--amrfmt',action='store_true',help='specifying the input file is AMR annotation file')
    arg_parser.add_argument('-e','--eval',nargs=2,help='Error Analysis: give parsed AMR file and gold AMR file')
    arg_parser.add_argument('--section',choices=['proxy','all'],default='all',help='choose section of the corpus. Only works for LDC2014T12 dataset.')

    args = arg_parser.parse_args()

    amr_file = args.amr_file
    instances = None
    train_instance = None
    constants.FLAG_COREF=args.coref
    constants.FLAG_PROP=args.prop
    constants.FLAG_DEPPARSER=args.depparser

    # using corenlp to preprocess the sentences 
    if args.mode == 'preprocess':
        instances = preprocess(amr_file,START_SNLP=True,INPUT_AMR=args.amrfmt)
        print "Done preprocessing!"
    # preprocess the JAMR aligned amr
    elif args.mode == 'test_gold_graph':     
        instances = preprocess(amr_file,False)
        #instances = pickle.load(open('data/gold_edge_graph.pkl','rb'))
        gold_amr = []
        for inst in instances:
            GraphState.sent = inst.tokens
            gold_amr.append(GraphState.get_parsed_amr(inst.gold_graph))
        #pseudo_gold_amr = [GraphState.get_parsed_amr(inst.gold_graph) for inst in instances]
        write_parsed_amr(gold_amr,instances,amr_file,'abt.gold')
        #instances = preprocess_aligned(amr_file)
        print "Done output AMR!"
    # do alignment
    elif args.mode == 'align':

        if args.input_file:
            instances = pickle.load(open(args.input_file,'rb'))
        else:
            raise ValueError("Missing data file! specify it using --input or using preprocessing!")
        gold_instances_file = args.input_file.split('.')[0]+'_gold.p'

        print >> log, "Doing alignment..."

        if LOGGED:
            saveerr = sys.stderr
            sys.stderr = open('./log/alignment.log','w')

        amr_aligner = Aligner(verbose=args.verbose)
        ref_graphs = []
        begin = args.begin 
        counter = 1
        #for snt, amr in zip(snts[begin:],amrs[begin:]):
        for i in range(len(instances)):
            snt = instances[i].text
            amr = instances[i].amr
            if args.verbose > 1:
                print >> log, counter
                print >> log, "Sentence:"
                print >> log, snt+'\n'
                
                print >> log, "AMR:"                
                print >> log, amr.to_amr_string()

            alresult = amr_aligner.apply_align(snt,amr)
            ref_amr_graph = SpanGraph.init_ref_graph(amr,alresult)
            #ref_graphs.append(ref_amr_graph)
            instances[i].addGoldGraph(ref_amr_graph)
            if args.verbose > 1:
                #print >> log, "Reference tuples:"
                #print >> log, ref_depGraph.print_tuples()
                print >> log, amr_aligner.print_align_result(alresult,amr)
                #raw_input('ENTER to continue')
            counter += 1

        pickle.dump(instances,open(gold_instances_file,'wb'),pickle.HIGHEST_PROTOCOL)
        #pickle.dump(ref_graphs,open('./data/ref_graph.p','wb'),pickle.HIGHEST_PROTOCOL)
        if LOGGED:
            sys.stderr.close() 
            sys.stderr = saveerr
        print >> log, "Done alignment and gold graph generation."
        sys.exit()
        
    # test user guide actions
    elif args.mode == 'userGuide':
        print 'Read in training instances...'
        train_instances = preprocess(amr_file,False)

        sentID = int(raw_input("Input the sent ID:"))
        amr_parser = Parser()
        amr_parser.testUserGuide(train_instances[sentID])

        sys.exit()

    # test deterministic oracle 
    elif args.mode == 'oracleGuide':
        
        train_instances = preprocess(amr_file,START_SNLP=False)
        try:
            hand_alignments = load_hand_alignments(amr_file+str('.hand_aligned'))
        except IOError:
            hand_alignments = []


        start_step = args.start_step
        begin = args.begin
        amr_parser = Parser(oracle_type=DET_T2G_ORACLE_ABT,verbose=args.verbose)
        #ref_graphs = pickle.load(open('./data/ref_graph.p','rb'))
        n_correct_total = .0
        n_parsed_total = .0
        n_gold_total = .0
        pseudo_gold_amr = []
        n_correct_tag_total = .0
        n_parsed_tag_total = 0.
        n_gold_tag_total = .0

        
        gold_amr = []
        aligned_instances = []
        #print "shuffling training instances"
        #random.shuffle(train_instances)
        for instance in train_instances[begin:]:
            
            if hand_alignments and instance.comment['id'] not in hand_alignments: continue
            state = amr_parser.testOracleGuide(instance,start_step)
            n_correct_arc,n1,n_parsed_arc, n_gold_arc,n_correct_tag,n_parsed_tag,n_gold_tag = state.evaluate()
            #assert n_correct_arc == n1
            if n_correct_arc != n1:
                import pdb
                pdb.set_trace()
            n_correct_total += n_correct_arc
            n_parsed_total += n_parsed_arc
            n_gold_total += n_gold_arc
            p = n_correct_arc/n_parsed_arc if n_parsed_arc else .0
            r = n_correct_arc/n_gold_arc if n_parsed_arc else .0
            indicator = 'PROBLEM!' if p < 0.5 else ''
            if args.verbose > 2: print >> sys.stderr, "Precision: %s Recall: %s  %s\n" % (p,r,indicator)
            n_correct_tag_total +=  n_correct_tag
            n_parsed_tag_total +=  n_parsed_tag
            n_gold_tag_total += n_gold_tag
            p1 = n_correct_tag/n_parsed_tag if n_parsed_tag else .0
            r1 = n_correct_tag/n_gold_tag if n_parsed_tag else .0
            if args.verbose > 2: print >> sys.stderr,"Tagging Precision:%s Recall:%s" % (p1,r1)

            instance.comment['alignments'] += ''.join(' %s-%s|%s'%(idx-1,idx,instance.amr.get_pid(state.A.abt_node_table[idx])) for idx in state.A.abt_node_table if isinstance(idx,int))

            aligned_instances.append(instance)
            pseudo_gold_amr.append(GraphState.get_parsed_amr(state.A))
            #gold_amr.append(instance.amr)
            #assert set(state.A.tuples()) == set(instance.gold_graph.tuples())
        pt = n_correct_total/n_parsed_total if n_parsed_total != .0 else .0
        rt = n_correct_total/n_gold_total if n_gold_total !=.0 else .0
        ft = 2*pt*rt/(pt+rt) if pt+rt != .0 else .0
        write_parsed_amr(pseudo_gold_amr,aligned_instances,amr_file,'pseudo-gold',hand_alignments)
        print "Total Accuracy: %s, Recall: %s, F-1: %s" % (pt,rt,ft)

        tp = n_correct_tag_total/n_parsed_tag_total if n_parsed_tag_total != .0 else .0
        tr = n_correct_tag_total/n_gold_tag_total if n_gold_tag_total != .0 else .0
        print "Tagging Precision:%s Recall:%s" % (tp,tr)

        #amr_parser.record_actions('data/action_set.txt')
    elif args.mode == 'train': # training
        print "Parser Config:"
        print "Incorporate Coref Information: %s"%(constants.FLAG_COREF)
        print "Incorporate SRL Information: %s"%(constants.FLAG_PROP)
        print "Dependency parser used: %s"%(constants.FLAG_DEPPARSER)
        train_instances = preprocess(amr_file,START_SNLP=False)        
        if args.dev: dev_instances = preprocess(args.dev,START_SNLP=False)


        if args.section != 'all':
            print "Choosing corpus section: %s"%(args.section)
            tcr = constants.get_corpus_range(args.section,'train')
            train_instances = train_instances[tcr[0]:tcr[1]]
            if args.dev:
                dcr = constants.get_corpus_range(args.section,'dev')
                dev_instances = dev_instances[dcr[0]:dcr[1]]

        
        feat_template = args.feat if args.feat else None
        model = Model(elog=experiment_log)
        #model.output_feature_generator()
        parser = Parser(model=model,oracle_type=DET_T2G_ORACLE_ABT,action_type=args.actionset,verbose=args.verbose,elog=experiment_log)
        model.setup(action_type=args.actionset,instances=train_instances,parser=parser,feature_templates_file=feat_template)
        
        print >> experiment_log, "BEGIN TRAINING!"
        for iter in xrange(1,args.iterations+1):
            print >> experiment_log, "shuffling training instances"
            random.shuffle(train_instances)
            
            print >> experiment_log, "Iteration:",iter
            begin_updates = parser.perceptron.get_num_updates()
            parser.parse_corpus_train(train_instances)
            parser.perceptron.average_weight()
            #model.save_model(args.model+'-iter'+str(iter)+'-'+str(int(time.time()))+'.m')
            model.save_model(args.model+'-iter'+str(iter)+'.m')
            if args.dev:
                print >> experiment_log ,"Result on develop set:"                
                _,parsed_amr = parser.parse_corpus_test(dev_instances)
                write_parsed_amr(parsed_amr,dev_instances,args.dev,args.section+'.'+str(iter)+'.parsed')

        print >> experiment_log ,"DONE TRAINING!"
        
    elif args.mode == 'parse': # actual parsing
        test_instances = preprocess(amr_file,START_SNLP=False,INPUT_AMR=False)
        if args.section != 'all':
            print "Choosing corpus section: %s"%(args.section)
            tcr = constants.get_corpus_range(args.section,'test')
            test_instances = test_instances[tcr[0]:tcr[1]]
            
        #random.shuffle(test_instances)
        print >> experiment_log, "Loading model: ", args.model 
        model = Model.load_model(args.model)
        parser = Parser(model=model,oracle_type=DET_T2G_ORACLE_ABT,action_type=args.actionset,verbose=args.verbose,elog=experiment_log)
        print >> experiment_log ,"BEGIN PARSING"
        span_graph_pairs,results = parser.parse_corpus_test(test_instances)
        write_parsed_amr(results,test_instances,amr_file,suffix='%s.parsed'%(args.section))
        #write_span_graph(span_graph_pairs,test_instances,amr_file,suffix='spg.50')
        ################
        # for eval     #
        ################
        #pickle.dump(span_graph_pairs,open('data/eval/%s_spg_pair.pkl'%(amr_file),'wb'),pickle.HIGHEST_PROTOCOL)
        #pickle.dump(test_instances,open('data/eval/%s_instances.pkl'%(amr_file),'wb'),pickle.HIGHEST_PROTOCOL)
        print >> experiment_log ,"DONE PARSING"
        
        #plt.hist(results)
        #plt.savefig('result.png')

    elif args.mode == 'eval':
        '''break down error analysis'''
        # TODO: here use pickled file, replace it with parsed AMR and gold AMR
        span_graph_pairs = pickle.load(open(args.eval[0],'rb'))
        instances = pickle.load(open(args.eval[1],'rb'))
        
        amr_parser = Parser(oracle_type=DET_T2G_ORACLE_ABT,verbose=args.verbose)
        error_stat = defaultdict(lambda:defaultdict(lambda:defaultdict(list)))
        for spg_pair,instance in zip(span_graph_pairs,instances):
            amr_parser.errorAnalyze(spg_pair[0],spg_pair[1],instance,error_stat)

    else:
        arg_parser.print_help()
Ejemplo n.º 6
0
def main():
    '''
    usage = "Usage:%prog [options] amr_file"
    opt = OptionParser(usage=usage)
    opt.add_option("-v",action="store",dest="verbose",type='int',
                   default=0,help="set up verbose level")
    opt.add_option("-a",action="store_true",dest="align",
                   default=False,help="do alignment between sentence and amr")
    opt.add_option("-b",action="store",dest="begin",type='int',
                   default=0,help="for debugging"
                   "When do alignment, where the alignment begins"
                   "When test oracle, where to begin")
    opt.add_option("-s",action="store",dest="start_step",type='int',
                   default=0,help="where the step begins,for testing oracle")
    opt.add_option("-o",action="store",dest="sentfilep",
                   help="output sentences to file and parse the sentence into dependency graph")
    opt.add_option("-i",action="store",dest="parsedfilep",
                   help="read parsed dependency graph from file")
    opt.add_option("-g",action="store",dest="userActfile",
                   help="read user input action sequences as guide")
    opt.add_option("-d",action="store",dest="oracle",type='int',default=0,\
                   help="test the output actions of deterministic oracle: "
                         "1: tree oracle 2: list-based oracle")
    '''
    arg_parser = argparse.ArgumentParser(
        description="Brandeis transition-based AMR parser 1.0")

    arg_parser.add_argument('-v',
                            '--verbose',
                            type=int,
                            default=0,
                            help='set up verbose level for debug')
    arg_parser.add_argument(
        '-b',
        '--begin',
        type=int,
        default=0,
        help=
        'specify which sentence to begin the alignment or oracle testing for debug'
    )
    arg_parser.add_argument(
        '-s',
        '--start_step',
        type=int,
        default=0,
        help='specify which step to begin oracle testing for debug')
    #arg_parser.add_argument('-i','--input_file',help='the input: preprocessed data instances file for aligner or training')
    arg_parser.add_argument('-d', '--dev', help='development file')
    arg_parser.add_argument('-as',
                            '--actionset',
                            choices=['basic'],
                            default='basic',
                            help='choose different action set')
    arg_parser.add_argument(
        '-m',
        '--mode',
        choices=[
            'preprocess', 'test_gold_graph', 'align', 'userGuide',
            'oracleGuide', 'train', 'parse'
        ],
        help="preprocess:generate pos tag, dependency tree, ner\n"
        "align:do alignment between AMR graph and sentence")
    arg_parser.add_argument(
        '-dp',
        '--depparser',
        choices=['stanford', 'turbo', 'mate', 'malt', 'stdconv+charniak'],
        default='stanford',
        help='choose the dependency parser, default:{stanford}')
    arg_parser.add_argument('--model', help='specify the model file')
    arg_parser.add_argument('--feat', help='feature template file')
    arg_parser.add_argument('-iter',
                            '--iterations',
                            type=int,
                            help='training iterations')
    arg_parser.add_argument('amr_file',
                            nargs='?',
                            help='amr bank file for preprocessing')

    args = arg_parser.parse_args()

    amr_file = args.amr_file
    instances = None
    train_instance = None

    constants.FLAG_DEPPARSER = args.depparser

    # using corenlp to preprocess the sentences
    if args.mode == 'preprocess':
        instances = preprocess(amr_file)
        print >> experiment_log, "Done preprocessing!"
    # preprocess the JAMR aligned amr
    elif args.mode == 'test_gold_graph':
        instances = preprocess(amr_file, False)
        #instances = pickle.load(open('data/gold_edge_graph.pkl','rb'))
        pseudo_gold_amr = []
        for inst in instances:
            GraphState.sent = inst.tokens
            pseudo_gold_amr.append(GraphState.get_parsed_amr(inst.gold_graph))
        #pseudo_gold_amr = [GraphState.get_parsed_amr(inst.gold_graph) for inst in instances]
        write_parsed_amr(pseudo_gold_amr, instances, amr_file, 'gold')
        #instances = preprocess_aligned(amr_file)
        print "Done output AMR!"
    # do alignment
    elif args.mode == 'align':

        if args.input_file:
            instances = pickle.load(open(args.input_file, 'rb'))
        else:
            raise ValueError(
                "Missing data file! specify it using --input or using preprocessing!"
            )
        gold_instances_file = args.input_file.split('.')[0] + '_gold.p'

        print >> log, "Doing alignment..."

        if LOGGED:
            saveerr = sys.stderr
            sys.stderr = open('./log/alignment.log', 'w')

        amr_aligner = Aligner(verbose=args.verbose)
        ref_graphs = []
        begin = args.begin
        counter = 1
        #for snt, amr in zip(snts[begin:],amrs[begin:]):
        for i in range(len(instances)):
            snt = instances[i].text
            amr = instances[i].amr
            if args.verbose > 1:
                print >> log, counter
                print >> log, "Sentence:"
                print >> log, snt + '\n'

                print >> log, "AMR:"
                print >> log, amr.to_amr_string()

            alresult = amr_aligner.apply_align(snt, amr)
            ref_amr_graph = SpanGraph.init_ref_graph(amr, alresult)
            #ref_graphs.append(ref_amr_graph)
            instances[i].addGoldGraph(ref_amr_graph)
            if args.verbose > 1:
                #print >> log, "Reference tuples:"
                #print >> log, ref_depGraph.print_tuples()
                print >> log, amr_aligner.print_align_result(alresult, amr)
                #raw_input('ENTER to continue')
            counter += 1

        pickle.dump(instances, open(gold_instances_file, 'wb'),
                    pickle.HIGHEST_PROTOCOL)
        #pickle.dump(ref_graphs,open('./data/ref_graph.p','wb'),pickle.HIGHEST_PROTOCOL)
        if LOGGED:
            sys.stderr.close()
            sys.stderr = saveerr
        print >> log, "Done alignment and gold graph generation."
        sys.exit()

    # test user guide actions
    elif args.mode == 'userGuide':
        print 'Read in training instances...'
        train_instances = preprocess(amr_file, False)

        sentID = int(raw_input("Input the sent ID:"))
        amr_parser = Parser()
        amr_parser.testUserGuide(train_instances[sentID])

        sys.exit()

    # test deterministic oracle
    elif args.mode == 'oracleGuide':

        train_instances = preprocess(amr_file, False)

        start_step = args.start_step
        begin = args.begin
        amr_parser = Parser(oracle_type=DETERMINE_TREE_TO_GRAPH_ORACLE_SC,
                            verbose=args.verbose)
        #ref_graphs = pickle.load(open('./data/ref_graph.p','rb'))
        n_correct_total = .0
        n_parsed_total = .0
        n_gold_total = .0
        pseudo_gold_amr = []
        for instance in train_instances[begin:]:
            state = amr_parser.testOracleGuide(instance, start_step)
            n_correct_arc, n1, n_parsed_arc, n_gold_arc, _, _, _ = state.evaluate(
            )
            assert n_correct_arc == n1
            n_correct_total += n_correct_arc
            n_parsed_total += n_parsed_arc
            n_gold_total += n_gold_arc
            p = n_correct_arc / n_parsed_arc if n_parsed_arc else .0
            indicator = 'PROBLEM!' if p < 0.5 else ''
            if args.dev > 2:
                print >> sys.stderr, "Accuracy: %s  %s\n" % (p, indicator)
            #if instance.sentID == 704:
            #    import pdb
            #    pdb.set_trace()
            pseudo_gold_amr.append(GraphState.get_parsed_amr(state.A))
            #assert set(state.A.tuples()) == set(instance.gold_graph.tuples())
        pt = n_correct_total / n_parsed_total if n_parsed_total != .0 else .0
        rt = n_correct_total / n_gold_total if n_gold_total != .0 else .0
        ft = 2 * pt * rt / (pt + rt) if pt + rt != .0 else .0
        write_parsed_amr(pseudo_gold_amr, train_instances, amr_file,
                         'pseudo-gold')
        print "Total Accuracy: %s, Recall: %s, F-1: %s" % (pt, rt, ft)

        #amr_parser.record_actions('data/action_set.txt')
    elif args.mode == 'train':  # actual parsing
        train_instances = preprocess(amr_file, False)
        if args.dev: dev_instances = preprocess(args.dev, False)
        feat_template = args.feat if args.feat else None
        model = Model(elog=experiment_log)
        model.setup(action_type=args.actionset,
                    instances=train_instances,
                    feature_templates_file=feat_template)
        #model.output_feature_generator()
        parser = Parser(model=model,
                        action_type=args.actionset,
                        verbose=args.verbose,
                        elog=experiment_log)

        print >> experiment_log, "BEGIN TRAINING!"
        for iter in xrange(1, args.iterations + 1):
            print >> experiment_log, "shuffling training instances"
            random.shuffle(train_instances)

            print >> experiment_log, "Iteration:", iter
            begin_updates = parser.perceptron.get_num_updates()
            parser.parse_corpus_train(train_instances)
            parser.perceptron.average_weight()
            #model.save_model(args.model+'-iter'+str(iter)+'-'+str(int(time.time()))+'.m')
            model.save_model(args.model + '-iter' + str(iter) + '.m')
            if args.dev:
                print >> experiment_log, "Result on develop set:"
                parsed_amr = parser.parse_corpus_test(dev_instances)
                write_parsed_amr(parsed_amr, dev_instances, args.dev)

        print >> experiment_log, "DONE TRAINING!"

    elif args.mode == 'parse':
        test_instances = preprocess(amr_file, False)

        model = Model.load_model(args.model)
        parser = Parser(model=model,
                        action_type=args.actionset,
                        verbose=args.verbose,
                        elog=experiment_log)
        print >> experiment_log, "BEGIN PARSING"
        results = parser.parse_corpus_test(test_instances)
        write_parsed_amr(results, test_instances, amr_file)
        print >> experiment_log, "DONE PARSING"
        #pickle.dump(results,open('data/gold_edge_graph.pkl','wb'),pickle.HIGHEST_PROTOCOL)
        #plt.hist(results)
        #plt.savefig('result.png')
    else:
        arg_parser.print_help()
Ejemplo n.º 7
0
    def parse_corpus_test(self, instances, do_evaluate=False):
        start_time = time.time()
        parsed_amr = []
        span_graph_pairs = []

        if do_evaluate:
            Parser.cm = np.zeros(shape=(len(GraphState.action_table),
                                        len(GraphState.action_table)))
            Parser.rtx = []
            Parser.rty = []
            Parser.steps = []

            n_correct_labeled_total = .0
            n_correct_total = .0
            n_parsed_total = .0
            n_gold_total = .0

            n_correct_tag_total = .0
            n_parsed_tag_total = .0
            brackets = defaultdict(set)
            results = []
            n_gold_tag_total = .0
            #cm_total = np.zeros(shape=(len(GraphState.action_table),len(GraphState.action_table)))
            #if WRITE_FAKE_AMR: out_fake_amr = open('data/fake_amr_triples.txt','w')

            for i, inst in enumerate(instances, 1):
                per_start_time = time.time()
                step, state = self.parse(inst, train=False)
                per_parse_time = round(time.time() - per_start_time, 3)

                Parser.rtx.append(len(inst.tokens))
                Parser.rty.append(per_parse_time)
                Parser.steps.append(step)

                n_correct_labeled_arc, n_correct_arc, n_parsed_arc, n_gold_arc, n_correct_tag, n_parsed_tag, n_gold_tag = state.evaluate(
                )

                p = n_correct_arc / n_parsed_arc if n_parsed_arc else .0
                r = n_correct_arc / n_gold_arc if n_gold_arc else .0
                f = 2 * p * r / (p + r) if p + r != .0 else .0

                n_correct_labeled_total += n_correct_labeled_arc
                n_correct_total += n_correct_arc
                n_parsed_total += n_parsed_arc
                n_gold_total += n_gold_arc

                n_correct_tag_total += n_correct_tag
                n_parsed_tag_total += n_parsed_tag
                n_gold_tag_total += n_gold_tag

                p1 = n_correct_arc / n_parsed_arc if n_parsed_arc != .0 else .0
                r1 = n_correct_arc / n_gold_arc
                f1 = 2 * p1 * r1 / (p1 + r1) if p1 + r1 != .0 else .0

                lp1 = n_correct_labeled_arc / n_parsed_arc if n_parsed_arc != .0 else .0
                lr1 = n_correct_labeled_arc / n_gold_arc
                lf1 = 2 * lp1 * lr1 / (lp1 + lr1) if lp1 + lr1 != .0 else .0

                tp1 = n_correct_tag / n_parsed_tag if n_parsed_tag != .0 else .0
                tr1 = n_correct_tag / n_gold_tag if n_gold_tag != .0 else .0

                score = (p1, r1, f1, lp1, lr1, lf1, tp1, tr1)

                parsed_amr.append(GraphState.get_parsed_amr(state.A))
                span_graph_pairs.append((state.A, state.gold_graph, score))
                print >> self.elog, "Done parsing sentence %s" % (state.sentID)

            print >> self.elog, "Parsing on %s instances " \
                                "takes %s" % (str(i), datetime.timedelta(seconds=round(time.time()-start_time, 0)))
            p = n_correct_total / n_parsed_total if n_parsed_total != .0 else .0
            r = n_correct_total / n_gold_total
            f = 2 * p * r / (p + r)
            print >> self.elog, "Unlabeled Precision:%s Recall:%s F1:%s" % (
                p, r, f)

            lp = n_correct_labeled_total / n_parsed_total
            lr = n_correct_labeled_total / n_gold_total
            lf = 2 * lp * lr / (lp + lr)
            print >> self.elog, "Labeled Precision:%s Recall:%s F1:%s" % (
                lp, lr, lf)

            tp = n_correct_tag_total / n_parsed_tag_total
            tr = n_correct_tag_total / n_gold_tag_total
            print >> self.elog, "Tagging Precision:%s Recall:%s" % (tp, tr)

            print "Confusion matrix action class:"
            np.set_printoptions(suppress=True)
            print np.round(np.divide(Parser.cm, 10))
        else:
            for i, inst in enumerate(instances, 1):
                per_start_time = time.time()
                step, state = self.parse(inst, train=False)
                per_parse_time = round(time.time() - per_start_time, 3)
                parsed_amr.append(GraphState.get_parsed_amr(state.A))
                if self.verbose > 1:
                    print >> self.elog, "Done parsing sentence %s" % (
                        state.sentID)

            print >> self.elog, "Parsing on %s instances takes %s" % (
                str(i),
                datetime.timedelta(seconds=round(time.time() - start_time, 0)))

        return span_graph_pairs, parsed_amr
def main():
    '''
    usage = "Usage:%prog [options] amr_file"
    opt = OptionParser(usage=usage)
    opt.add_option("-v",action="store",dest="verbose",type='int',
                   default=0,help="set up verbose level")
    opt.add_option("-a",action="store_true",dest="align",
                   default=False,help="do alignment between sentence and amr")
    opt.add_option("-b",action="store",dest="begin",type='int',
                   default=0,help="for debugging"
                   "When do alignment, where the alignment begins"
                   "When test oracle, where to begin")
    opt.add_option("-s",action="store",dest="start_step",type='int',
                   default=0,help="where the step begins,for testing oracle")
    opt.add_option("-o",action="store",dest="sentfilep",
                   help="output sentences to file and parse the sentence into dependency graph")
    opt.add_option("-i",action="store",dest="parsedfilep",
                   help="read parsed dependency graph from file")
    opt.add_option("-g",action="store",dest="userActfile",
                   help="read user input action sequences as guide")
    opt.add_option("-d",action="store",dest="oracle",type='int',default=0,\
                   help="test the output actions of deterministic oracle: "
                         "1: tree oracle 2: list-based oracle")
    '''
    arg_parser = argparse.ArgumentParser(description="Brandeis transition-based AMR parser 1.0")
    
    arg_parser.add_argument('-v','--verbose',type=int,default=0,help='set up verbose level for debug')
    arg_parser.add_argument('-b','--begin',type=int,default=0,help='specify which sentence to begin the alignment or oracle testing for debug')
    arg_parser.add_argument('-s','--start_step',type=int,default=0,help='specify which step to begin oracle testing for debug')
    #arg_parser.add_argument('-i','--input_file',help='the input: preprocessed data instances file for aligner or training')
    arg_parser.add_argument('-d','--dev',help='development file')
    arg_parser.add_argument('-as','--actionset',choices=['basic'],default='basic',help='choose different action set')
    arg_parser.add_argument('-m','--mode',choices=['preprocess','test_gold_graph','align','userGuide','oracleGuide','train','parse'],help="preprocess:generate pos tag, dependency tree, ner\n" "align:do alignment between AMR graph and sentence")
    arg_parser.add_argument('-dp','--depparser',choices=['stanford','turbo','mate','malt','stdconv+charniak'],default='stanford',help='choose the dependency parser, default:{stanford}')
    arg_parser.add_argument('--model',help='specify the model file')
    arg_parser.add_argument('--feat',help='feature template file')
    arg_parser.add_argument('-iter','--iterations',type=int,help='training iterations')
    arg_parser.add_argument('amr_file',nargs='?',help='amr bank file for preprocessing')
    

    args = arg_parser.parse_args()

    amr_file = args.amr_file
    instances = None
    train_instance = None

    constants.FLAG_DEPPARSER=args.depparser

    # using corenlp to preprocess the sentences 
    if args.mode == 'preprocess':
        instances = preprocess(amr_file)
        print >> experiment_log, "Done preprocessing!"
    # preprocess the JAMR aligned amr
    elif args.mode == 'test_gold_graph':     
        instances = preprocess(amr_file,False)
        #instances = pickle.load(open('data/gold_edge_graph.pkl','rb'))
        pseudo_gold_amr = []
        for inst in instances:
            GraphState.sent = inst.tokens
            pseudo_gold_amr.append(GraphState.get_parsed_amr(inst.gold_graph))
        #pseudo_gold_amr = [GraphState.get_parsed_amr(inst.gold_graph) for inst in instances]
        write_parsed_amr(pseudo_gold_amr,instances,amr_file,'gold')
        #instances = preprocess_aligned(amr_file)
        print "Done output AMR!"
    # do alignment
    elif args.mode == 'align':

        if args.input_file:
            instances = pickle.load(open(args.input_file,'rb'))
        else:
            raise ValueError("Missing data file! specify it using --input or using preprocessing!")
        gold_instances_file = args.input_file.split('.')[0]+'_gold.p'

        print >> log, "Doing alignment..."

        if LOGGED:
            saveerr = sys.stderr
            sys.stderr = open('./log/alignment.log','w')

        amr_aligner = Aligner(verbose=args.verbose)
        ref_graphs = []
        begin = args.begin 
        counter = 1
        #for snt, amr in zip(snts[begin:],amrs[begin:]):
        for i in range(len(instances)):
            snt = instances[i].text
            amr = instances[i].amr
            if args.verbose > 1:
                print >> log, counter
                print >> log, "Sentence:"
                print >> log, snt+'\n'
                
                print >> log, "AMR:"                
                print >> log, amr.to_amr_string()

            alresult = amr_aligner.apply_align(snt,amr)
            ref_amr_graph = SpanGraph.init_ref_graph(amr,alresult)
            #ref_graphs.append(ref_amr_graph)
            instances[i].addGoldGraph(ref_amr_graph)
            if args.verbose > 1:
                #print >> log, "Reference tuples:"
                #print >> log, ref_depGraph.print_tuples()
                print >> log, amr_aligner.print_align_result(alresult,amr)
                #raw_input('ENTER to continue')
            counter += 1

        pickle.dump(instances,open(gold_instances_file,'wb'),pickle.HIGHEST_PROTOCOL)
        #pickle.dump(ref_graphs,open('./data/ref_graph.p','wb'),pickle.HIGHEST_PROTOCOL)
        if LOGGED:
            sys.stderr.close() 
            sys.stderr = saveerr
        print >> log, "Done alignment and gold graph generation."
        sys.exit()
        
    # test user guide actions
    elif args.mode == 'userGuide':
        print 'Read in training instances...'
        train_instances = preprocess(amr_file,False)

        sentID = int(raw_input("Input the sent ID:"))
        amr_parser = Parser()
        amr_parser.testUserGuide(train_instances[sentID])

        sys.exit()

    # test deterministic oracle 
    elif args.mode == 'oracleGuide':
        
        train_instances = preprocess(amr_file,False)

        start_step = args.start_step
        begin = args.begin
        amr_parser = Parser(oracle_type=DETERMINE_TREE_TO_GRAPH_ORACLE_SC,verbose=args.verbose)
        #ref_graphs = pickle.load(open('./data/ref_graph.p','rb'))
        n_correct_total = .0
        n_parsed_total = .0
        n_gold_total = .0
        pseudo_gold_amr = []
        for instance in train_instances[begin:]:
            state = amr_parser.testOracleGuide(instance,start_step)
            n_correct_arc,n1,n_parsed_arc, n_gold_arc,_,_,_ = state.evaluate()
            assert n_correct_arc == n1
            n_correct_total += n_correct_arc
            n_parsed_total += n_parsed_arc
            n_gold_total += n_gold_arc
            p = n_correct_arc/n_parsed_arc if n_parsed_arc else .0
            indicator = 'PROBLEM!' if p < 0.5 else ''
            if args.dev > 2: print >> sys.stderr, "Accuracy: %s  %s\n" % (p,indicator)
            #if instance.sentID == 704:
            #    import pdb
            #    pdb.set_trace()
            pseudo_gold_amr.append(GraphState.get_parsed_amr(state.A))
            #assert set(state.A.tuples()) == set(instance.gold_graph.tuples())
        pt = n_correct_total/n_parsed_total if n_parsed_total != .0 else .0
        rt = n_correct_total/n_gold_total if n_gold_total !=.0 else .0
        ft = 2*pt*rt/(pt+rt) if pt+rt != .0 else .0
        write_parsed_amr(pseudo_gold_amr,train_instances,amr_file,'pseudo-gold')
        print "Total Accuracy: %s, Recall: %s, F-1: %s" % (pt,rt,ft)

        #amr_parser.record_actions('data/action_set.txt')
    elif args.mode == 'train': # actual parsing
        train_instances = preprocess(amr_file,False)
        if args.dev: dev_instances = preprocess(args.dev,False)
        feat_template = args.feat if args.feat else None
        model = Model(elog=experiment_log)
        model.setup(action_type=args.actionset,instances=train_instances,feature_templates_file=feat_template)
        #model.output_feature_generator()
        parser = Parser(model=model,action_type=args.actionset,verbose=args.verbose,elog=experiment_log)
        
        print >> experiment_log, "BEGIN TRAINING!"
        for iter in xrange(1,args.iterations+1):
            print >> experiment_log, "shuffling training instances"
            random.shuffle(train_instances)
            
            print >> experiment_log, "Iteration:",iter
            begin_updates = parser.perceptron.get_num_updates()
            parser.parse_corpus_train(train_instances)
            parser.perceptron.average_weight()
            #model.save_model(args.model+'-iter'+str(iter)+'-'+str(int(time.time()))+'.m')
            model.save_model(args.model+'-iter'+str(iter)+'.m')
            if args.dev:
                print >> experiment_log ,"Result on develop set:"                
                parsed_amr = parser.parse_corpus_test(dev_instances)
                write_parsed_amr(parsed_amr,dev_instances,args.dev)

        print >> experiment_log ,"DONE TRAINING!"
        
    elif args.mode == 'parse':        
        test_instances = preprocess(amr_file,False)

        model = Model.load_model(args.model)
        parser = Parser(model=model,action_type=args.actionset,verbose=args.verbose,elog=experiment_log)
        print >> experiment_log ,"BEGIN PARSING"
        results = parser.parse_corpus_test(test_instances)
        write_parsed_amr(results,test_instances,amr_file)
        print >> experiment_log ,"DONE PARSING"
        #pickle.dump(results,open('data/gold_edge_graph.pkl','wb'),pickle.HIGHEST_PROTOCOL)
        #plt.hist(results)
        #plt.savefig('result.png')
    else:
        arg_parser.print_help()