def mainline(train=False): datadir = DIR["BASE"] + "data/" if train is True: featurefile = datadir + "train-features.txt" xmldir = DIR["BASE"] + "demo/train/" else: featurefile = datadir + "test-features.txt" xmldir = DIR["BASE"] + "demo/test/" deleteFiles([featurefile]) # infile = xmldir + 'C08-1122-parscit-section.xml' client_socket = getConnection() for infile in glob(xmldir + "*.xml"): try: print infile + " is being processed." if train is True: generateTrainFeatures(client_socket, infile, featurefile) else: generateTestFeatures(client_socket, infile, featurefile) except Exception as e: print "Some Exception in the main pipeline" print (str(type(e))) print str(e) logging.exception("Something awfull !!") model = DIR["DATA"] + "sec-tfidf-model.txt" if train is False: # TESTING outfile = DIR["DATA"] + "sec-tfidf-test-out.txt" predictSvm(featurefile, model, outfile) extractValues(outfile) outstring = "Default values Test results" analyze(featurefile, outfile, outstring=outstring) pickleIt() else: # TRAINING trainSvm(featurefile, model) outfile = DIR["DATA"] + "sec-tfidf-train-out.txt" predictSvm(featurefile, model, outfile) outstring = "Default values" analyze(featurefile, outfile, outstring=outstring)
def mainline(train=False): datadir = DIR['BASE'] + "data/" if train is True: featurefile = datadir + 'train-features.txt' xmldir = DIR['BASE'] + "demo/train/" else: featurefile = datadir + 'test-features.txt' xmldir = DIR['BASE'] + "demo/test/" deleteFiles([featurefile]) #infile = xmldir + 'C08-1122-parscit-section.xml' client_socket = getConnection() for infile in glob(xmldir + "*.xml"): try: print infile + " is being processed." if train is True: generateTrainFeatures(client_socket, infile, featurefile) else: generateTestFeatures(client_socket, infile, featurefile) except Exception as e: print "Some Exception in the main pipeline" print(str(type(e))) print str(e) logging.exception("Something awfull !!") model = DIR['DATA'] + "sec-tfidf-model.txt" if train is False: # TESTING outfile = DIR['DATA'] + "sec-tfidf-test-out.txt" predictSvm(featurefile, model, outfile) extractValues(outfile) outstring = "Default values Test results" analyze(featurefile, outfile, outstring=outstring) pickleIt() else: # TRAINING trainSvm(featurefile, model) outfile = DIR['DATA'] + "sec-tfidf-train-out.txt" predictSvm(featurefile, model, outfile) outstring = "Default values" analyze(featurefile, outfile, outstring=outstring)
def pickleIt(): picklefile = DIR["DATA"] + "test-sentences-pickle" deleteFiles([picklefile]) with open(picklefile, "wb") as pfile: pickle.dump(test_data, pfile)
from random import choice from analysis import analyze from utilityFunctions import writeToFile, trainSvm, predictSvm, deleteFiles picklefile = DIR['DATA'] + 'test-sentences-pickle' with open(picklefile, 'rb') as pfile: global data data = pickle.load(pfile) datadir = DIR['BASE'] + "data/" model = DIR['DATA'] + "sec-tfidf-model-small.txt" featurefile = datadir + 'features-small.txt' outfile = DIR['DATA'] + "sec-tfidf-train-out-small.txt" resfile = DIR['DATA'] + "sec-tfidf-result-small.txt" deleteFiles([model, featurefile, outfile, resfile]) bucket = data.keys() precision = [] recall = [] all_sets = [] #for i in range(07): # train_set = list(bucket) # test_set = [] # for k in range(11): # curr = choice(train_set) # test_set.append(curr) # train_set.remove(curr)
def pickleIt(): picklefile = DIR['DATA'] + 'test-sentences-pickle' deleteFiles([picklefile]) with open(picklefile, 'wb') as pfile: pickle.dump(test_data, pfile)
from Config import DIR from random import choice from analysis import analyze from utilityFunctions import writeToFile, trainSvm, predictSvm, deleteFiles picklefile = DIR['DATA'] + 'test-sentences-pickle' with open(picklefile, 'rb') as pfile: global data data = pickle.load(pfile) datadir = DIR['BASE'] + "data/" model = DIR['DATA'] + "sec-tfidf-model-small.txt" featurefile = datadir + 'features-small.txt' outfile = DIR['DATA'] + "sec-tfidf-train-out-small.txt" resfile = DIR['DATA'] + "sec-tfidf-result-small.txt" deleteFiles([model, featurefile, outfile, resfile]) bucket = data.keys() precision = [] recall = [] all_sets = [] #for i in range(07): # train_set = list(bucket) # test_set = [] # for k in range(11): # curr = choice(train_set) # test_set.append(curr) # train_set.remove(curr)