def main(): dataPrefix = "sim-data" try: printv("Attempting to load data...") timeAndEnergy, coreConfigs, maxIterations = preproc.readData( dataPrefix + "-processed.json") except IOError: # file does not exist printv("Failed to load existing data") printv("Parsing data and calculating energy...") timeAndEnergy, coreConfigs, maxIterations = preproc.parseAndCalcEnergy( filePrefix=dataPrefix, cleanData=False, iterations=27) printv("Writing data to disk...") preproc.writeData([timeAndEnergy, coreConfigs, maxIterations], dataPrefix + "-processed.json") printv("Cleaning data...") preproc.cleanupData(timeAndEnergy, maxStds=3) printv("Averaging data across iterations...") preproc.avgMatrix(timeAndEnergy) # avg all iterations printv("Creating and solving ILP model...") solMatrix = sim.solveConfigModel( timeAndEnergy, coreConfigs, logFilename='gurobi_sol_plot.log') # optimize model modelConstrTables(solMatrix) modelConstrTables(solMatrix, timeParam='construct')
def get_ngrams(n, mode="word"): training_tweets, training_users, training_target_values = readData( FLAGS.training_data_path) test_tweets, test_users, test_target_values = readData( FLAGS.test_data_path) training_tweets, training_users = preprocess(training_tweets, training_users, training_target_values, mode) test_tweets, test_users = preprocess(test_tweets, test_users, test_target_values, mode) training_ngrams = ngram_extract(training_tweets, n, mode="training") test_ngrams = ngram_extract(test_tweets, n, mode="test") return list(training_ngrams), list(training_users), list( test_ngrams), list(test_users)
def textRankMain(input_file,n,m): global countWords,data,finaldata print "Running TextRank v2.0 on", input_file,"...." graph=Graph() data,finaldata,countWords=readData(input_file) graph.set_structure(data) graph.textRank() graph.sort_nodes_textrank(n) print "Finished TextRank v2.0" return graph.summarize(m)
def textSummarizeMain(input_file, m): global countWords, data, finaldata print("Running default TextRank on", input_file, "....") graph = Graph() data, finaldata, countWords = readData(input_file) # print finaldata # print countWords # raw_input() countWords = len(finaldata) graph.set_structure(finaldata) graph.textSummarize() print("Finished TextRank.") return graph.sort_nodes_textsummarize(m)
def textSummarizeMain(input_file,m): global countWords,data,finaldata print "Running default TextRank on", input_file,"...." graph=Graph() data,finaldata,countWords=readData(input_file) # print finaldata # print countWords # raw_input() countWords=len(finaldata) graph.set_structure(finaldata) graph.textSummarize() print "Finished TextRank." return graph.sort_nodes_textsummarize(m)
def hitsMain(input_data, n): graph = Graph() # print input_data data = readData(input_data, 'english') #graph.set_structure([["d1","d2"],["d1","d3"],["d2","d1"],["d2","d3"],["d3","d2"],["d3","d4"],["d4","d2"]]) # print data graph.set_structure(data) # graph.hubs_and_authorities() # graph.HITS() # answer=graph.sort_nodes_hits(n) graph.textRank() answer = graph.sort_nodes_textrank(n) result = [] for item in answer: result.append(item.name) return result
def hitsMain(input_data,n): graph=Graph() # print input_data data=readData(input_data,'english') #graph.set_structure([["d1","d2"],["d1","d3"],["d2","d1"],["d2","d3"],["d3","d2"],["d3","d4"],["d4","d2"]]) # print data graph.set_structure(data) # graph.hubs_and_authorities() # graph.HITS() # answer=graph.sort_nodes_hits(n) graph.textRank() answer=graph.sort_nodes_textrank(n) result=[] for item in answer: result.append(item.name) return result
import preprocess from config import config import modelCNN from utils import saveModel if __name__ == '__main__': raw_data = preprocess.readData(config) sms_text, sms_label = preprocess.cleanData(raw_data) x_train, y_train, x_val, y_val = preprocess.categorical( sms_text, sms_label, config) embedding_layer = preprocess.train_dic(sms_text, config) clf = modelCNN.trainCNN(x_train, y_train, x_val, y_val, embedding_layer) saveModel(clf, "CNN", config)
print('negtive similarity computed...') np.save('test_pre_neg.npy', simi_neg) acc = np.sum(simi_pos > simi_neg) / simi_pos.shape[0] print("relation pos>neg accurcy: " + str(acc)) index = 0 false_list = list() true_list = list() true_all = list() all_set = set() config = ConfigParser() config.read('./config.ini') data = readData(config.get('pre', 'test_filepath')) relation = readRelation(config.get('pre', 'relation_filepath')) for num, neg_index in neg_num: l = int(np.argmax(simi_neg[index:index + num])) # 最大负例下标 max_neg = relation_feature_neg[index + l] # 选出的最优候选 gold = relation_feature[index] if (max_neg == gold).all(): # 判断最优候选是否与标准答案相同 true_all.append(neg_index) # print(str(neg_index) + ",rel_right") else: false_list.append(neg_index) # print(str(neg_index) + ",rel_wrong") print(new_relation_dict[int(gold[0])] + "," + new_relation_dict[int(max_neg[0])])
'01': 'neutral', '02': 'calm', '03': 'happy', '04': 'sad', '05': 'angry', '06': 'fearful', '07': 'disgust', '08': 'surprised' } emotion_arr = [ 'neutral', 'calm', 'happy', 'sad', 'angry', 'fearful', 'disgust', 'surprised' ] x_train, x_test, y_train, y_test = preprocess.readData(emotions, dir_path, test_size=0.2) print((x_train.shape[0], x_test.shape[0])) print('Features extracted: ', x_train.shape[1]) #defining a Multilayer perceptron classifier model = MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300, ), learning_rate='adaptive', max_iter=500) # Train the model model.fit(x_train, y_train)
def main(): # 0 indicates categorical , 1 indicates continuous scale dataType = Globals.DATATYPE # raw file is stored in Data folder in the project df = readData(Globals.SOURCEFILE) for x in range(2): if x + 1 == 1: print("MISSING VALUES DROPPED !!") add = "(MISSING VALUES DROPPED )" # output file will be stored in Data folder with open(Globals.OUTPUTFILE, 'w') as outfile: outfile.write( "Missing Value Strategy : Drop the rows with Missing Values\n" ) outfile.write( "============================================================\n" ) outfile.close() with open(Globals.SUMMARY, 'w') as summaryF: summaryF.write( "Missing Value Strategy : Drop the rows with Missing Values\n" ) summaryF.write( "============================================================\n" ) summaryF.close() dm, missingFile = dealMissing(df, dataType, 1) else: print(" MISSING VALUES REPLACED !! with central tendencies") add = "( MISSING VALUES REPLACED )" with open(Globals.OUTPUTFILE, 'a') as outfile: outfile.write( "Missing Value Strategy : Replace continuous Variable with mean/median and categorical with mode\n" ) outfile.write( "================================================================================================\n" ) outfile.close() with open(Globals.SUMMARY, 'a') as summaryF: summaryF.write( "Missing Value Strategy : Replace continuous Variable with mean/median and categorical with mode\n" ) summaryF.write( "================================================================================================\n" ) summaryF.close() dm, missingFile = dealMissing(df, dataType, 2) for choice in range(2): choice = choice + 1 if choice == 1: print("\nRunning Discrete Naive Bayesian {}".format(add)) print( "-----------------------------------------------------------------\n" ) # discretize the data and pass all categorial data file to evaluateNB , last parameter 1 = call categoricalNB print("Discretizing of continuous Data:") print("-------------------------------") DisChoise = input( "Do you want to give custom range for one or more attributes ??\n" + " Enter 1 for Yes\n Any other value will be considered no\n" + "Please Enter your Choice: ") if DisChoise == 1: _, filePath = makediscrete2(dm, dataType) else: _, filePath = makediscrete3(dm, dataType) evaluateNB(filePath, dataType, 1) else: print(" Running Guassian Naive Bayesian {}".format(add)) print( " -----------------------------------------------------------------\n" ) # last parameter 2 = call GuassianNB evaluateNB(missingFile, dataType, 2)
def main(): # 0 indicates categorical , 1 indicates continuous scale dataType = Globals.DATATYPE # raw file is stored in Data folder in the project df = readData(Globals.SOURCEFILE) for x in range(2): if x+1 == 1: print ("MISSING VALUES DROPPED !!") add = "(MISSING VALUES DROPPED )" # output file will be stored in Data folder with open(Globals.OUTPUTFILE, 'w') as outfile: outfile.write("Missing Value Strategy : Drop the rows with Missing Values\n") outfile.write("============================================================\n") outfile.close() with open(Globals.SUMMARY, 'w') as summaryF: summaryF.write("Missing Value Strategy : Drop the rows with Missing Values\n") summaryF.write("============================================================\n") summaryF.close() dm, missingFile = dealMissing(df, dataType, 1) else: print (" MISSING VALUES REPLACED !! with central tendencies") add = "( MISSING VALUES REPLACED )" with open(Globals.OUTPUTFILE, 'a') as outfile: outfile.write("Missing Value Strategy : Replace continuous Variable with mean/median and categorical with mode\n") outfile.write("================================================================================================\n") outfile.close() with open(Globals.SUMMARY, 'a') as summaryF: summaryF.write("Missing Value Strategy : Replace continuous Variable with mean/median and categorical with mode\n") summaryF.write("================================================================================================\n") summaryF.close() dm, missingFile = dealMissing(df, dataType, 2) for choice in range(2): choice = choice +1 if choice == 1: print ("\nRunning Discrete Naive Bayesian {}".format(add)) print ("-----------------------------------------------------------------\n") # discretize the data and pass all categorial data file to evaluateNB , last parameter 1 = call categoricalNB print ("Discretizing of continuous Data:") print ("-------------------------------") DisChoise = input( "Do you want to give custom range for one or more attributes ??\n" + " Enter 1 for Yes\n Any other value will be considered no\n" + "Please Enter your Choice: ") if DisChoise == 1: _, filePath = makediscrete2(dm, dataType) else: _, filePath = makediscrete3(dm, dataType) evaluateNB(filePath, dataType, 1) else: print (" Running Guassian Naive Bayesian {}".format(add)) print (" -----------------------------------------------------------------\n") # last parameter 2 = call GuassianNB evaluateNB(missingFile, dataType, 2)
def main(): dataPrefix = "sim-data/sim-data" graphPrefix = "graphs/" doPerSiteComparisons = True doAvgViolins = False doSampleViolins = False doPerSampleComparisons = False # TODO fix this modelSolvingGraphs = False minMaxAvgCompGraphs = True try: printv("Attempting to load data...") timeAndEnergyRaw, coreConfigs, maxIterations = preproc.readData( dataPrefix + "-processed.json") except IOError: # file does not exist printv("Failed to load existing data") printv("Parsing data and calculating energy...") timeAndEnergyRaw, coreConfigs, maxIterations = preproc.parseAndCalcEnergy( filePrefix=dataPrefix, cleanData=False, iterations=27) printv("Writing data to disk...") preproc.writeData([timeAndEnergyRaw, coreConfigs, maxIterations], dataPrefix + "-processed.json") timeAndEnergyClean = deepcopy(timeAndEnergyRaw) timeAndEnergySet = deepcopy(timeAndEnergyRaw) printv("Cleaning data...") preproc.cleanupData(timeAndEnergyClean, maxStds=3) printv("Averaging data across iterations...") preproc.avgMatrix(timeAndEnergySet) # avg all iterations printv("Creating and solving averaged ILP model...") avgSolMatrix = sim.solveConfigModel( timeAndEnergySet, coreConfigs, logFilename='gurobi-logs/gurobi_avg_sol_plot.log') # optimize model if doPerSampleComparisons: solMatrixSamples = [] printv("Solving sample ILP models...") iterations = 27 for site in sites: printv(site) for coreConfig in coreConfigs: #printv("\t"+ coreConfig) for i in range(iterations): #printv("\t\t"+str(i)) logFile = 'gurobi-logs/gurobi_sol_plot_' + site + '_' + coreConfig + '_' + str( i) + '.log' solMatrixSamples.append( sim.solveConfigModel(preproc.extractIter( timeAndEnergyClean, i), coreConfigs, logFilename=logFile)) printv("Graphing per Sample site comparisons") graphCompAllSamples(timeAndEnergyRaw, solMatrixSamples, outputPrefix="graphs/", compType='energy', writeOut=False) if doSampleViolins: printv("Graphing Violin plots...") if doAvgViolins: printv("Graphing Averaged Violin plots...") for site in sites: printv(site) for coreConfig in coreConfigs: graphViolinPlot(timeAndEnergyRaw, avgSolMatrix, coreConfig, site=site, outputPrefix=graphPrefix + "Violins-Phases/", graphType='loadtime', writeOut=True) if doPerSiteComparisons: for site in sites: printv(site) printv("optimal") graphOptimal(timeAndEnergySet, coreConfigs, avgSolMatrix, outputPrefix=graphPrefix, site=site, writeOut=True) printv("relative comparison") graphRelComparison(timeAndEnergySet, avgSolMatrix, outputPrefix=graphPrefix, site=site, writeOut=True) printv("absolute comparison") graphAbsComparison(timeAndEnergySet, avgSolMatrix, outputPrefix=graphPrefix, site=site, writeOut=True) if modelSolvingGraphs: printv("model solving times") graphModelTime(avgSolMatrix, outputPrefix=graphPrefix, timeParam='optimize', writeOut=True) printv("model construction times") graphModelTime(avgSolMatrix, outputPrefix=graphPrefix, timeParam='construct', writeOut=True) if minMaxAvgCompGraphs: printv("Min Max Avg Loadtime Comparison") graphCompAllSites(timeAndEnergySet, avgSolMatrix, outputPrefix=graphPrefix, writeOut=True, compType='loadtime') printv("Min Max Avg Energy Comparison") graphCompAllSites(timeAndEnergySet, avgSolMatrix, outputPrefix=graphPrefix, writeOut=True, compType='energy') printv("Average Loadtime Comparison") graphAllSitesAverages(timeAndEnergySet, avgSolMatrix, outputPrefix=graphPrefix, writeOut=True, compType='loadtime') printv("Average Energy Comparison") graphAllSitesAverages(timeAndEnergySet, avgSolMatrix, outputPrefix=graphPrefix, writeOut=True, compType='energy')