Example #1
0
def main(argv):
    logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
    
    data_file_name = argv[1]
    dict_file_name = argv[2]
    output_file_name = argv[3]
    max_len = int(argv[4])
    sufix_len = int(argv[5])
    files_no = int(argv[6])

    data = pp.read_gzip_data(data_file_name)
    dictionary = pp.read_gzip_data(dict_file_name)
    output = prepareData(data, max_len, dictionary, sufix_len)
    
    #output_file_name = '2_data_for_train/{0}-max_len={1}-sufix_len={2}-train_files_no={3}-dict_size={4}'.format(output_file_name, max_len, sufix_len, files_no, len(dictionary))

    if not os.path.exists(output_file_name):
        os.mkdir(output_file_name)

    data_size = len(output)
    batch_size = int(np.ceil(1.0 * data_size / files_no))

    data_iterator = 0
    i = 1000
    

    while data_iterator < data_size:
        pp.save_gzip_data('{0}{1}.txt.gz'.format(output_file_name, i), output[data_iterator : min(data_iterator + batch_size , data_size)])
        data_iterator += batch_size
        i += 1
    logging.info('Finished...')
Example #2
0
def main(argv):
    logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

    data_file_name = argv[1]
    dict_file_name = argv[2]
    working_dir = argv[3]
    seq_max_len = int(argv[4])
    sufix_len = int(argv[5])
    batch_size = 2304
    it_size = batch_size 

    logging.info('Reading model...')
    model_file = open(working_dir + 'model.json', 'r')
    json_model = model_file.read()
    model = model_from_json(json_model)

    logging.info('Reading weights...')
    model.load_weights(working_dir + 'weights.h5')

    dictionary = pp.read_gzip_data(dict_file_name)

    data = pp.read_gzip_data(data_file_name)
    test_data, test_data_count = pp.prepareTestData(data_file_name)

    test_data = pd.prepareData(test_data, seq_max_len, dictionary, sufix_len)
    pp.save_gzip_data(working_dir + 'test_data.txt.gz', test_data) 
    test_data = pp.convertDataToKerasFormat(test_data, seq_max_len, dictionary)

    logging.info('Testing model on data...')
    full_size = len(test_data) 
    it = 0
    res_win = 0
    res_los = 0
    res_inc = 0
    res_all = 0
    it_count = 0
    while it < full_size:
        starting_it_count = it_count
        it2 = it
        for one_count in range(it_count, len(test_data_count)):
            if it2 + test_data_count[one_count] > min(full_size, it + it_size):
                break
            else:
                it_count += 1
                it2 += test_data_count[one_count]
        tmp_test_data = test_data[it : it2]
        tmp_test_data_count = test_data_count[starting_it_count : it_count]
        tmp_results =  model.predict_proba(tmp_test_data, batch_size = batch_size)
        win, los, inc, al = testModel(tmp_test_data, tmp_results, tmp_test_data_count)
        res_win += win
        res_los += los
        res_inc += inc
        res_all += al
        it = it2 

    printFinalResults(res_win, res_los, res_inc, res_all)
Example #3
0
def main(argv):

    logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
    file_name = argv[1]
    dict_name = argv[2]
    dict_size = int(argv[3])
    sufix_len = int(argv[4])

    output_file_name = '1_dicts/{0}-dict_size={1}-sufix_len={2}.txt.gz'.format(dict_name, dict_size, sufix_len)

    data = pp.read_gzip_data(file_name)
    dictionary = createDict(data, dict_size, sufix_len)
    pp.save_gzip_data(output_file_name, dictionary)