Exemple #1
0
def mainline(train=False):
    datadir = DIR["BASE"] + "data/"
    if train is True:
        featurefile = datadir + "train-features.txt"
        xmldir = DIR["BASE"] + "demo/train/"
    else:
        featurefile = datadir + "test-features.txt"
        xmldir = DIR["BASE"] + "demo/test/"
    deleteFiles([featurefile])
    # infile = xmldir + 'C08-1122-parscit-section.xml'
    client_socket = getConnection()
    for infile in glob(xmldir + "*.xml"):
        try:
            print infile + " is being processed."
            if train is True:
                generateTrainFeatures(client_socket, infile, featurefile)
            else:
                generateTestFeatures(client_socket, infile, featurefile)
        except Exception as e:
            print "Some Exception in the main pipeline"
            print (str(type(e)))
            print str(e)
            logging.exception("Something awfull !!")
    model = DIR["DATA"] + "sec-tfidf-model.txt"
    if train is False:
        # TESTING
        outfile = DIR["DATA"] + "sec-tfidf-test-out.txt"
        predictSvm(featurefile, model, outfile)
        extractValues(outfile)
        outstring = "Default values Test results"
        analyze(featurefile, outfile, outstring=outstring)
        pickleIt()
    else:
        # TRAINING
        trainSvm(featurefile, model)
        outfile = DIR["DATA"] + "sec-tfidf-train-out.txt"
        predictSvm(featurefile, model, outfile)
        outstring = "Default values"
        analyze(featurefile, outfile, outstring=outstring)
Exemple #2
0
def mainline(train=False):
    datadir = DIR['BASE'] + "data/"
    if train is True:
        featurefile = datadir + 'train-features.txt'
        xmldir = DIR['BASE'] + "demo/train/"
    else:
        featurefile = datadir + 'test-features.txt'
        xmldir = DIR['BASE'] + "demo/test/"
    deleteFiles([featurefile])
    #infile = xmldir + 'C08-1122-parscit-section.xml'
    client_socket = getConnection()
    for infile in glob(xmldir + "*.xml"):
        try:
            print infile + " is being processed."
            if train is True:
                generateTrainFeatures(client_socket, infile, featurefile)
            else:
                generateTestFeatures(client_socket, infile, featurefile)
        except Exception as e:
            print "Some Exception in the main pipeline"
            print(str(type(e)))
            print str(e)
            logging.exception("Something awfull !!")
    model = DIR['DATA'] + "sec-tfidf-model.txt"
    if train is False:
        # TESTING
        outfile = DIR['DATA'] + "sec-tfidf-test-out.txt"
        predictSvm(featurefile, model, outfile)
        extractValues(outfile)
        outstring = "Default values Test results"
        analyze(featurefile, outfile, outstring=outstring)
        pickleIt()
    else:
        # TRAINING
        trainSvm(featurefile, model)
        outfile = DIR['DATA'] + "sec-tfidf-train-out.txt"
        predictSvm(featurefile, model, outfile)
        outstring = "Default values"
        analyze(featurefile, outfile, outstring=outstring)
Exemple #3
0
for i in range(07):
    set = []
    for k in range(11):
        curr = choice(bucket)
        set.append(curr)
        bucket.remove(curr)
    all_sets.append(set)

for i in range(07):
    test_set = all_sets[i]
    train_set = []
    for set in [all_sets[z] for z in range(07) if z != i]:
        train_set.extend(set)
    for key in train_set:
        writeToFile(featurefile, data[key]['features'] + '\n', 'a')
    trainSvm(featurefile, model, gamma=1)
    predictSvm(featurefile, model, outfile)
    outstring = "Training Fold : " + str(i)
    print "************* " + outstring + " *************"
    analyze(featurefile, outfile, resfile, outstring)

    deleteFiles([featurefile, outfile])

    for key in test_set:
        writeToFile(featurefile, data[key]['features'] + '\n', 'a')
    predictSvm(featurefile, model, outfile)
    outstring = "Testing Fold : " + str(i)
    pre, rec = analyze(featurefile, outfile, resfile, outstring)
    precision.append(pre)
    recall.append(rec)
Exemple #4
0
for i in range(07):
    set = []
    for k in range(11):
        curr = choice(bucket)
        set.append(curr)
        bucket.remove(curr)
    all_sets.append(set)

for i in range(07):
    test_set = all_sets[i]
    train_set = []
    for set in [all_sets[z] for z in range(07) if z != i]:
        train_set.extend(set)
    for key in train_set:
        writeToFile(featurefile, data[key]['features'] + '\n', 'a')
    trainSvm(featurefile, model, gamma=1)
    predictSvm(featurefile, model, outfile)
    outstring = "Training Fold : " + str(i)
    print "************* " + outstring + " *************"
    analyze(featurefile, outfile, resfile, outstring)

    deleteFiles([featurefile, outfile])

    for key in test_set:
        writeToFile(featurefile, data[key]['features'] + '\n', 'a')
    predictSvm(featurefile, model, outfile)
    outstring = "Testing Fold : " + str(i)
    pre, rec = analyze(featurefile, outfile, resfile, outstring)
    precision.append(pre)
    recall.append(rec)