Exemple #1
0
def mainline(train=False):
    datadir = DIR["BASE"] + "data/"
    if train is True:
        featurefile = datadir + "train-features.txt"
        xmldir = DIR["BASE"] + "demo/train/"
    else:
        featurefile = datadir + "test-features.txt"
        xmldir = DIR["BASE"] + "demo/test/"
    deleteFiles([featurefile])
    # infile = xmldir + 'C08-1122-parscit-section.xml'
    client_socket = getConnection()
    for infile in glob(xmldir + "*.xml"):
        try:
            print infile + " is being processed."
            if train is True:
                generateTrainFeatures(client_socket, infile, featurefile)
            else:
                generateTestFeatures(client_socket, infile, featurefile)
        except Exception as e:
            print "Some Exception in the main pipeline"
            print (str(type(e)))
            print str(e)
            logging.exception("Something awfull !!")
    model = DIR["DATA"] + "sec-tfidf-model.txt"
    if train is False:
        # TESTING
        outfile = DIR["DATA"] + "sec-tfidf-test-out.txt"
        predictSvm(featurefile, model, outfile)
        extractValues(outfile)
        outstring = "Default values Test results"
        analyze(featurefile, outfile, outstring=outstring)
        pickleIt()
    else:
        # TRAINING
        trainSvm(featurefile, model)
        outfile = DIR["DATA"] + "sec-tfidf-train-out.txt"
        predictSvm(featurefile, model, outfile)
        outstring = "Default values"
        analyze(featurefile, outfile, outstring=outstring)
Exemple #2
0
def mainline(train=False):
    datadir = DIR['BASE'] + "data/"
    if train is True:
        featurefile = datadir + 'train-features.txt'
        xmldir = DIR['BASE'] + "demo/train/"
    else:
        featurefile = datadir + 'test-features.txt'
        xmldir = DIR['BASE'] + "demo/test/"
    deleteFiles([featurefile])
    #infile = xmldir + 'C08-1122-parscit-section.xml'
    client_socket = getConnection()
    for infile in glob(xmldir + "*.xml"):
        try:
            print infile + " is being processed."
            if train is True:
                generateTrainFeatures(client_socket, infile, featurefile)
            else:
                generateTestFeatures(client_socket, infile, featurefile)
        except Exception as e:
            print "Some Exception in the main pipeline"
            print(str(type(e)))
            print str(e)
            logging.exception("Something awfull !!")
    model = DIR['DATA'] + "sec-tfidf-model.txt"
    if train is False:
        # TESTING
        outfile = DIR['DATA'] + "sec-tfidf-test-out.txt"
        predictSvm(featurefile, model, outfile)
        extractValues(outfile)
        outstring = "Default values Test results"
        analyze(featurefile, outfile, outstring=outstring)
        pickleIt()
    else:
        # TRAINING
        trainSvm(featurefile, model)
        outfile = DIR['DATA'] + "sec-tfidf-train-out.txt"
        predictSvm(featurefile, model, outfile)
        outstring = "Default values"
        analyze(featurefile, outfile, outstring=outstring)
Exemple #3
0
def pickleIt():
    picklefile = DIR["DATA"] + "test-sentences-pickle"
    deleteFiles([picklefile])
    with open(picklefile, "wb") as pfile:
        pickle.dump(test_data, pfile)
Exemple #4
0
from random import choice
from analysis import analyze
from utilityFunctions import writeToFile, trainSvm, predictSvm, deleteFiles


picklefile = DIR['DATA'] + 'test-sentences-pickle'
with open(picklefile, 'rb') as pfile:
    global data
    data = pickle.load(pfile)

datadir = DIR['BASE'] + "data/"
model = DIR['DATA'] + "sec-tfidf-model-small.txt"
featurefile = datadir + 'features-small.txt'
outfile = DIR['DATA'] + "sec-tfidf-train-out-small.txt"
resfile = DIR['DATA'] + "sec-tfidf-result-small.txt"
deleteFiles([model, featurefile, outfile, resfile])

bucket = data.keys()

precision = []
recall = []

all_sets = []

#for i in range(07):
#    train_set = list(bucket)
#    test_set = []
#    for k in range(11):
#        curr = choice(train_set)
#        test_set.append(curr)
#        train_set.remove(curr)
Exemple #5
0
def pickleIt():
    picklefile = DIR['DATA'] + 'test-sentences-pickle'
    deleteFiles([picklefile])
    with open(picklefile, 'wb') as pfile:
        pickle.dump(test_data, pfile)
Exemple #6
0
from Config import DIR
from random import choice
from analysis import analyze
from utilityFunctions import writeToFile, trainSvm, predictSvm, deleteFiles

picklefile = DIR['DATA'] + 'test-sentences-pickle'
with open(picklefile, 'rb') as pfile:
    global data
    data = pickle.load(pfile)

datadir = DIR['BASE'] + "data/"
model = DIR['DATA'] + "sec-tfidf-model-small.txt"
featurefile = datadir + 'features-small.txt'
outfile = DIR['DATA'] + "sec-tfidf-train-out-small.txt"
resfile = DIR['DATA'] + "sec-tfidf-result-small.txt"
deleteFiles([model, featurefile, outfile, resfile])

bucket = data.keys()

precision = []
recall = []

all_sets = []

#for i in range(07):
#    train_set = list(bucket)
#    test_set = []
#    for k in range(11):
#        curr = choice(train_set)
#        test_set.append(curr)
#        train_set.remove(curr)