Beispiel #1
0
    def __init__(self, dataset="django"):
        arg_parser = init_arg_parser()
        if dataset == "django":
            args = init_arg_parser().parse_args("--mode test \
                                         --load_model src/external_repos/tranX/data/pretrained_models/django.bin \
                                         --beam_size 15 \
                                         --test_file src/external_repos/tranX/data/django/test.bin \
                                         --save_decode_to 0.test.decode \
                                         --decode_max_time_step 100 \
                                         --example_preprocessor django_example_processor"
                                                .split())
        elif dataset == "conala":
            args = init_arg_parser().parse_args("--mode test \
                                         --load_model src/external_repos/tranX/data/pretrained_models/conala.bin \
                                         --beam_size 15 \
                                         --test_file src/external_repos/tranX/data/conala/test.bin \
                                         --save_decode_to 0.test.decode \
                                         --decode_max_time_step 100 \
                                         --example_preprocessor conala_example_processor"
                                                .split())

        self.parser = StandaloneParser(args.parser,
                                       args.load_model,
                                       args.example_preprocessor,
                                       beam_size=args.beam_size,
                                       cuda=args.cuda)
Beispiel #2
0
def interactive_mode(args):
    """Interactive mode"""
    print('Start interactive mode', file=sys.stderr)

    parser = StandaloneParser(args.parser,
                              args.load_model,
                              args.example_preprocessor,
                              beam_size=args.beam_size,
                              cuda=args.cuda)

    while True:
        utterance = input('Query:').strip()
        hypotheses = parser.parse(utterance, debug=True)

        for hyp_id, hyp in enumerate(hypotheses):
            print('------------------ Hypothesis %d ------------------' % hyp_id)
            print(hyp.code)
 def __init__(self, cuda=True, fields={'input_field':'input_text', 'output_field':'pred_text'}, 
              model_file='src/external_repos/external-knowledge-codegen/best_pretrained_models/finetune.mined.retapi.distsmpl.dr0.3.lr0.001.lr_de0.5.lr_da15.beam15.seed0.mined_100000.intent_count100k_topk1_temp5.bin'):
     '''
     This uses the model from Frank Xu preseented in: 
     Incorporating External Knowledge through Pre-training for Natural Language to Code Generation
     
     It translates English to Python code
     '''        
     self.fields = fields
     parser = 'default_parser'
     processor = 'conala_example_processor'
     beam_size = 15
     reranker_file = 'src/external_repos/external-knowledge-codegen/best_pretrained_models/reranker.conala.vocab.src_freq3.code_freq3.mined_100000.intent_count100k_topk1_temp5.bin'
     self.parser = StandaloneParser(parser,
                           model_file,
                           processor,
                           beam_size=beam_size,
                           cuda=cuda,
                           reranker_path=reranker_file)
Beispiel #4
0
class TranX_Prertrained_Translator():
    def __init__(self, dataset="django"):
        arg_parser = init_arg_parser()
        if dataset == "django":
            args = init_arg_parser().parse_args("--mode test \
                                         --load_model src/external_repos/tranX/data/pretrained_models/django.bin \
                                         --beam_size 15 \
                                         --test_file src/external_repos/tranX/data/django/test.bin \
                                         --save_decode_to 0.test.decode \
                                         --decode_max_time_step 100 \
                                         --example_preprocessor django_example_processor"
                                                .split())
        elif dataset == "conala":
            args = init_arg_parser().parse_args("--mode test \
                                         --load_model src/external_repos/tranX/data/pretrained_models/conala.bin \
                                         --beam_size 15 \
                                         --test_file src/external_repos/tranX/data/conala/test.bin \
                                         --save_decode_to 0.test.decode \
                                         --decode_max_time_step 100 \
                                         --example_preprocessor conala_example_processor"
                                                .split())

        self.parser = StandaloneParser(args.parser,
                                       args.load_model,
                                       args.example_preprocessor,
                                       beam_size=args.beam_size,
                                       cuda=args.cuda)

    def raw_predict(self, src_str):
        #         try:
        utterance = src_str.strip()
        hypotheses = self.parser.parse(utterance, debug=False)

        pred_code_list = [hyp.code for hyp in hypotheses]
        return pred_code_list[0]
#         except:
#             return ""

    def raw_batch_predict(self, batch_src_strs):
        return [self.raw_predict(src_str) for src_str in batch_src_strs]
class CoNaLa_SOTA_Transform():
    def __init__(self, cuda=True, fields={'input_field':'input_text', 'output_field':'pred_text'}, 
                 model_file='src/external_repos/external-knowledge-codegen/best_pretrained_models/finetune.mined.retapi.distsmpl.dr0.3.lr0.001.lr_de0.5.lr_da15.beam15.seed0.mined_100000.intent_count100k_topk1_temp5.bin'):
        '''
        This uses the model from Frank Xu preseented in: 
        Incorporating External Knowledge through Pre-training for Natural Language to Code Generation
        
        It translates English to Python code
        '''        
        self.fields = fields
        parser = 'default_parser'
        processor = 'conala_example_processor'
        beam_size = 15
        reranker_file = 'src/external_repos/external-knowledge-codegen/best_pretrained_models/reranker.conala.vocab.src_freq3.code_freq3.mined_100000.intent_count100k_topk1_temp5.bin'
        self.parser = StandaloneParser(parser,
                              model_file,
                              processor,
                              beam_size=beam_size,
                              cuda=cuda,
                              reranker_path=reranker_file)
        
    def __call__(self, samples):
        '''
        samples: [dict]: [{'input_text': "`foo` is an empty list"},...]
        returns: [dict]: [{'input_text': "`foo` is an empty list", 'pred_text': "foo = []"}]
        '''
        for sample_obj in tqdm(samples, desc='Tranx:'):
            input_text = sample_obj[self.fields['input_field']]
            input_text = input_text.strip()
            try:
                hypotheses = self.parser.parse(input_text, debug=False)
                sample_obj[self.fields['output_field']] = hypotheses[0].code
            except (IndexError, SyntaxError):
                sample_obj[self.fields['output_field']] = ''
                print("### ERROR ### input text:", input_text)
                print(sys.exc_info()[0])
        return samples
        actions_repr = [action.__repr__(True) for action in hyp.action_infos]

        hyp_entry = dict(id=hyp_id + 1,
                         value=hyp.code,
                         tree_repr=hyp.tree.to_string(),
                         score=hyp.rerank_score.item() if hasattr(
                             hyp, 'rerank_score') else hyp.score.item(),
                         actions=actions_repr)

        responses['hypotheses'].append(hyp_entry)

    return jsonify(responses)


if __name__ == '__main__':
    args = init_arg_parser().parse_args()
    config_dict = json.load(open(args.config_file))

    for parser_id, config in config_dict.items():
        parser = StandaloneParser(
            parser_name=config['parser'],
            model_path=config['model_path'],
            example_processor_name=config['example_processor'],
            beam_size=config['beam_size'],
            reranker_path=config['reranker_path'],
            cuda=args.cuda)

        parsers[parser_id] = parser

    app.run(host='0.0.0.0', port=args.port, debug=True)