Ejemplo n.º 1
0
import tensorflow as tf
import os
from common import Config, VocabType
from argparse import ArgumentParser
from file2vec import File2Vec
from model import Model
from sys import argv
from model_defs import models

dataset_dir = 'java_files/'

if __name__ == '__main__':
    # Get the model for this session
    modelDef = models[int(argv[1])]
    print("\n\nRunning model:", modelDef['name'], '\n\n')
    config = Config.get_default_config(modelDef['location'])

    modelObj = Model(config, modelDef['name'])
    modelObj.predict([])
    print('Created model')

    # For each dataset in our collection of them, run the model on it
    for dataset in os.listdir(dataset_dir):
        if os.path.isdir(os.path.join(dataset_dir, dataset)):
            print("Processing dataset:", dataset)
            file2vec = File2Vec(config, modelObj, modelDef, dataset)
            file2vec.run()

    modelObj.close_session()
Ejemplo n.º 2
0
                        required=False,
                        help="save target vectors in word2vec format")
    parser.add_argument('--export_code_vectors',
                        action='store_true',
                        required=False,
                        help="export code vectors for the given examples")
    parser.add_argument(
        '--release',
        action='store_true',
        help=
        'if specified and loading a trained model, release the loaded model for a lower model '
        'size.')
    parser.add_argument('--predict', action='store_true')
    args = parser.parse_args()

    config = Config.get_default_config(args)

    model = Model(config)
    print('Created model')
    if config.TRAIN_PATH:
        model.train()
    if args.save_w2v is not None:
        model.save_word2vec_format(args.save_w2v, source=VocabType.Token)
        print('Origin word vectors saved in word2vec text format in: %s' %
              args.save_w2v)
    if args.save_t2v is not None:
        model.save_word2vec_format(args.save_t2v, source=VocabType.Target)
        print('Target word vectors saved in word2vec text format in: %s' %
              args.save_t2v)
    if config.TEST_PATH and not args.data_path:
        eval_results = model.evaluate()
    parser.add_argument("--word",
                        help="choose word count: 1 - wordcount&cloud;",
                        type=int)
    parser.add_argument("--tag",
                        help="choose tag count: 1 - tagcount;",
                        type=int)
    parser.add_argument(
        "--summary",
        help="choose summary algorithm: 0 - LSA; 1 - LEX_RANK; \
    2 - TEXT_RANK; 3 - SUM_BASIC; 4 - TextTeaser; 5 - MMR",
        type=int)
    args = parser.parse_args()
    print(args)
    # check data is prepared
    term = args.term
    config = Config.get_default_config(term)
    if not fu.file_exist(config.Q_OUTPUT_FILE):
        print(config.Q_OUTPUT_FILE + ' File not exists...')
        sys.exit()

    raw_documents = fu.read_data(config.Q_OUTPUT_FILE)
    '''
        First Analysis Words and Tags 
    '''
    if args.word == 1:
        # count words
        wc = Word_count()
        wc.extract_words(raw_documents)
        # wc.plot(config.WORDCOUNT_PIC)
    if args.tag == 1:
        # count tags