def main(argv):
    param_fpath = sys.argv[1]
    param_dic = dict()
    with open(param_fpath) as f:
        line = f.readline().strip()
        while line != '':
            param_dic[line.split('=')[0].strip()] = line.split('=')[1].strip()
            line = f.readline().strip()
    gen_method = param_dic['gen_method']
    corpus_word_path = param_dic['corpus_word_path']
    corpus_word_vector_path = param_dic['corpus_word_vector_path']
    utt_path_list = param_dic['utt_path_list'].split()
    output_fpath_list = param_dic['output_fpath_list'].split()

    task_cat_path = param_dic['task_cat_path']
    nontask_cat_path = param_dic['nontask_cat_path']



    # # ----- / STEP1: generate sentence vector & save to file / ----- #
    # # PARAM: sentence generation method: [mean | inner_prd]
    # gen_method = 'mean'
    # # gen_method = 'inner_prd'
    # # PARAM: server generated corpus word & word vector file paths
    # corpus_word_path = '../data/task_nontask_chat/ch2v_word_list'
    # corpus_word_vector_path = '../data/task_nontask_chat/ch2v_vector_list'
    # # PARAM: generate sentence vectors for task-correct dataset; task-error dataset; nontask-correct dataset, nontask-error dataset, chat dataset
    # task_utt_path_list = ['../data/utterances/task/task-correct.csv', '../data/utterances/task/task-error.csv', '../data/utterances/nontask/nontask-correct.csv', '../data/utterances/nontask/nontask-error.csv', '../data/chat/chatbot.txt']
    # output_fpath_list = ['../output/task_correct_sentence_vector_list', '../output/task_error_sentence_vector_list', '../output/nontask_correct_sentence_vector_list', '../output/nontask_error_sentence_vector_list', '../output/chat_sentence_vector_list']



    # generate sentence vector file
    for task_utt_fpath, output_fpath in zip(utt_path_list, output_fpath_list):
        gen_sentvec_file(corpus_word_path, corpus_word_vector_path, task_utt_fpath, output_fpath, gen_method)

    print '# Sentence vector generated #'
    # ----- / STEP2: run logistic regression / ----- #
    # PARAM
    # each line in category file corresponding to each line in vector file
    # category paths


    # task_cat_path = '../data/utterances/task/task-category.csv'
    # nontask_cat_path = '../data/utterances/nontask/nontask-category.csv'

    # vector paths - the files generated by gen_sentvec.py
    senvec_fpath_dic = dict()
    senvec_fpath_dic['task_correct_vector_fpath'] = output_fpath_list[0]
    senvec_fpath_dic['task_error_vector_fpath'] = output_fpath_list[1]
    senvec_fpath_dic['nontask_correct_vector_fpath'] = output_fpath_list[2]
    senvec_fpath_dic['nontask_error_vector_fpath'] = output_fpath_list[3]
    senvec_fpath_dic['chat_vector_fpath'] = output_fpath_list[4]

    print 'LR result for correct dataset:'
    lr(task_cat_path, nontask_cat_path, senvec_fpath_dic['task_correct_vector_fpath'], senvec_fpath_dic['nontask_correct_vector_fpath'], senvec_fpath_dic['chat_vector_fpath'])
    print 'LR result for error dataset:'
    lr(task_cat_path, nontask_cat_path, senvec_fpath_dic['task_error_vector_fpath'], senvec_fpath_dic['nontask_error_vector_fpath'], senvec_fpath_dic['chat_vector_fpath'])
Exemple #2
0
def main():
    timers.start("dataload")
    train_pix, train_labels, test_pix, test_labels = load_train_data()
    timers.stop("dataload")
    method = find_argv("method", "nn")
    if method == "pybrain":
        import nn_pybrain
        nn_pybrain.nn(train_pix, train_labels, test_pix, test_labels)
    if method == "nn":
        import nn
        nn.nn2(train_pix, train_labels, test_pix, test_labels)
    elif method == "lr":
        import lr
        lr.lr(train_pix, train_labels, test_pix, test_labels)
Exemple #3
0
def trainModel(training_data, data_model, alpha=.001):
    '''Use logistic regression to train weights for all fields in the data model'''
    labels = training_data['label']
    examples = training_data['distances']

    (weight, bias) = lr.lr(labels, examples, alpha)

    fields = sorted(data_model['fields'].keys())

    for (i, name) in enumerate(fields):
        data_model['fields'][name]['weight'] = float(weight[i])

    data_model['bias'] = bias

    return data_model
Exemple #4
0
def trainModel(training_data, data_model, alpha=.001):
    '''Use logistic regression to train weights for all fields in the data model'''
    labels = training_data['label']
    examples = training_data['distances']

    (weight, bias) = lr.lr(labels, examples, alpha)

    fields = sorted(data_model['fields'].keys())

    for (i, name) in enumerate(fields):
        data_model['fields'][name]['weight'] = float(weight[i])

    data_model['bias'] = bias

    return data_model
Exemple #5
0
def trainModel(training_data, data_model, alpha=0.001):
    """
    Use logistic regression to train weights for all fields in the data model
    """

    labels = training_data["label"]
    examples = training_data["distances"]

    (weight, bias) = lr.lr(labels, examples, alpha)

    for i, name in enumerate(data_model["fields"]):
        data_model["fields"][name]["weight"] = float(weight[i])

    data_model["bias"] = bias

    return data_model
Exemple #6
0
def trainModel(training_data, data_model, alpha=.001):
    """
    Use logistic regression to train weights for all fields in the data model
    """
    
    labels = numpy.array(training_data['label'] == 'match', dtype='i4')
    examples = training_data['distances']

    (weight, bias) = lr.lr(labels, examples, alpha)

    for i, name in enumerate(data_model['fields']) :
        data_model['fields'][name]['weight'] = float(weight[i])

    data_model['bias'] = bias

    return data_model
Exemple #7
0
def trainModel(training_data, data_model, alpha=.001):

    labels = training_data['label']
    examples = training_data['field_distances']

    (weight, bias) = lr.lr(labels, examples, alpha)

    fields = sorted(data_model['fields'].keys())

    #weights = dict(zip(fields[0], weight))
    for i, name in enumerate(fields):
        data_model['fields'][name]['weight'] = float(weight[i])

    data_model['bias'] = bias

    return data_model
Exemple #8
0
def trainModel(training_data, data_model, alpha=.001):

    (labels, fields, examples) = zip(*[(l, f, e) for (l, (f, e))
                                       in training_data])

    labels = numpy.array(labels, dtype='i4')
    examples = numpy.array(examples, dtype='f4')
    (weight, bias) = lr.lr(labels, examples, alpha)

    weights = dict(zip(fields[0], weight))
    for name in data_model['fields']:
        data_model['fields'][name]['weight'] = float(weights[name])

    data_model['bias'] = bias

    return data_model
Exemple #9
0
from metric import printErrorMetrics

from rf import rf
from rr import rr
from nn import nn
from lr import lr


if __name__ == '__main__':
    extract_dir = sys.argv[1]
    fnum = int(sys.argv[2])

    """ datasets and labels's size is fnum """
    datasets, labels = GetAllData(extract_dir, fnum, 'bfs', total_vertex_num=4900578, L=500000)
    # datasets, labels = GetAllData(extract_dir, fnum, 'bfs', total_vertex_num=65608366, L=10000000)

    """ ridge regression """
    sr, sl = rr(datasets, labels, fnum)

    """ neural network """
    sr, sl = nn(datasets, labels, fnum)
    
    """ liner regression """
    sr, sl = lr(datasets, labels, fnum)

    """ random forest """
    sr, sl = rf(datasets, labels, fnum)

    """ draw picture """
    sample_draw(sr,sl)
import SVM
import lr
import Bayes
import LDA
LDA.LDA()
Bayes.Bayes()
SVM.svmwch()
lr.lr()
Exemple #11
0
 def __init__(self, labels, examples):
     self.weights, self.bias = lr(labels, examples, 0.01, 500, 5000, 0.01)