def enumerateSPtuples_Translate(self, keyword=None, maxTweet=99999, reader=dataReader()): reader = reader c = reader.cursor_tweet(keyword) tweetContent = {} SPtuples = [] translator = googleTranslatror() cl = ClausIE.get_instance(jar_filename='./clausie/clausie.jar') for tweetIndex, item in enumerate(c): print tweetIndex + 1, ':' content = item['Content'].replace(u'\u200b', '') content = content.replace(u'\xa0', '') content = content.replace(u'\u5168\u6587', '') content = content.encode('utf-8') content = re.sub(r'#.*?#|@.*?\s|\[.*?\]|\s|【|】|全文|的秒拍视频|(|)|“|”', '', content) print content content = translator.translate_zhCN2en(content) print content if tweetIndex + 1 == maxTweet: break return tweetContent, SPtuples
def __init__(self): sequenceName_set = [ '2011_09_30_drive_0018_sync', '2011_09_26_drive_0096_sync', '2011_09_26_drive_0104_sync', '2011_09_26_drive_0117_sync', '2011_09_30_drive_0033_sync', '2011_10_03_drive_0034_sync', '2011_10_03_drive_0027_sync', '2011_09_30_drive_0028_sync', '2011_09_26_drive_0019_sync', '2011_09_26_drive_0020_sync', '2011_09_26_drive_0022_sync', '2011_09_26_drive_0023_sync', '2011_09_26_drive_0035_sync', '2011_09_26_drive_0036_sync', '2011_09_26_drive_0039_sync', '2011_09_26_drive_0046_sync', '2011_09_26_drive_0061_sync', '2011_09_26_drive_0064_sync', '2011_09_26_drive_0079_sync', '2011_09_26_drive_0086_sync', ] self.data_reader = dict() for sequence in sequenceName_set: self.data_reader[sequence] = dataReader.dataReader(sequence)
def __init__(self): sequenceName_set = [ '2011_09_30_drive_0018_sync', '2011_09_26_drive_0096_sync', '2011_09_26_drive_0104_sync', '2011_09_26_drive_0117_sync', '2011_09_30_drive_0033_sync', ] self.data_reader = dict() for sequence in sequenceName_set: self.data_reader[sequence] = dataReader.dataReader(sequence)
def __init__(self): # Reader Object self.reader = dr.dataReader() self.manager = dm.dataManager() # Define data to read self.reader.readTrainingData = self.reader.readDUCData # self.reader.readTrainingData = self.reader.readBBCNews # self.reader.readTrainingData = self.reader.readReviews # Input # Output # Internal self.current_progress = 0 self.modelTrain = self.sequenceToSequenceModelTrain #self.modelTrain = self.recursiveModelTrain # Constant change it according to training machine self.NUMBER_OF_LSTM = 400 self.NUMBER_OF_SAMPLE = 1375 self.PROGRESS_PATH = "Data/progress.txt" self.TRAINING_DATA_PATH = "Data/training_data/DUC2007_Summarization_Documents/duc2007_testdocs/" # self.TRAINING_DATA_PATH = "Data/training_data/Reviews.csv" # self.GLOVE_WEIGHT_PATH = "Data/pre_trained_GloVe/glove.6B.100d.txt" self.MODEL_PATH = "Data/s2s.h5"
def enumerateWordSeg(self, keyword=None, maxTweet=99999, reader=dataReader()): reader = reader c = reader.cursor_tweet(keyword) tweetContent = [] for tweetIndex, item in enumerate(c): print tweetIndex + 1, ':' content = item['Content'].replace(u'\u200b', '') content = content.replace(u'\xa0', '') content = content.replace(u'\u5168\u6587', '') content = content.encode('utf-8') content = re.sub(r'#.*?#|@.*?\s|\[.*?\]|\s|【|】|全文|的秒拍视频|(|)|“|”', '', content) print content tweetContent.append(' '.join(self.segmentor.segment(content))) if tweetIndex + 1 == maxTweet: break return tweetContent
import dataReader import datetime import Solver import numpy as np filenames = [ 'C:/Users/Vanman/PycharmProjects/Planner/Planner test/møder.xlsx', 'C:/Users/Vanman/PycharmProjects/Planner/Planner test/personer.xlsx', 'C:/Users/Vanman/PycharmProjects/Planner/Planner test/Unavailability.xlsx' ] data_reader = dataReader.dataReader() data_reader.read_data(filenames) solver = Solver.Solver() solution = solver.Solve(data_reader) def get_date(row): meeting_date = datetime.datetime.fromisocalendar( datetime.datetime.now().year + 1, row['Week'], row['Day']) + datetime.timedelta(minutes=9 * 60 + int(row['Kvarter'] * 15)) return meeting_date solution['Dato & Tid'] = solution.apply(lambda row: get_date(row), axis=1) solution = solution.drop( ['num_meetings', 'days_between', 'Week', 'Day', 'Kvarter'], axis=1) print(solution)
yValidation.fillna(0, inplace=True) pred = self._model.predict_proba(xValidation) pred = pred[:, 1] predDF = pd.DataFrame(pred, columns=["click"]).to_csv('pClick.csv') R_Square = r2_score(yValidation, pred) print(R_Square) if __name__ == "__main__": trainset="../we_data/train.csv" validationset="../we_data/validation.csv" testset="../we_data/test.csv" trainReader = dataReader(trainset) validationReader = dataReader(validationset) trainDF = trainReader.getDataFrame() validationDF = validationReader.getDataFrame() xTrain = trainReader.getOneHotData() yTrain = trainReader.getDataFrame()['click'] xValidation = validationReader.getOneHotData() yValidation = validationReader.getDataFrame()['click'] pc = PredictClick() pc.trainModel(xTrain, yTrain) pc.validateModel(xValidation, yValidation) print('...')
countries = {'FRA'} #Mawimum number of locations (random selection among all selected countries) max_number_loc = 10 #Number of lead times to be studied (up to 91) nbr_leadTimes = 10 #Starting and ending time of training period ('YYYY-MM-DD HH:MM:SS') start_time = '2012-01-02 00:00:00' end_time = '2012-12-31 00:00:00' #Starting and ending time of testing period - when scenarios will be generated ('YYYY-MM-DD HH:MM:SS') fore_start_time = '2014-09-01 00:00:00' fore_end_time = '2014-09-10 00:00:00' #Use of the improved forecast model (0:no - 1:yes) - only relevant for wind case improv_forecast = 1 #Number of scenarios to be computed nb_scenarios = 50 ##CODE STRUCTURE - DON'T MODIFY IF ONLY USE import sys sys.path.insert(0, folder_code) from dataReader import dataReader from modelEstimation import modelEstimation from scenarioGeneration import scenarioGeneration, save_scenarios data = dataReader(countries,max_number_loc,renewable_type,data_type,start_time, end_time,fore_start_time,fore_end_time,nbr_leadTimes,folder_data) model = modelEstimation(data) scenarios = scenarioGeneration(model, data, improv_forecast, nb_scenarios) save_scenarios(scenarios, folder_output)
def getDataset(command, robotType, minTimesteps): reader = dataReader.dataReader("rerunDB"); dataset = reader.retrieveSensorData(command = command, robotType = robotType, minTimesteps = minTimesteps); return dataset;
def main(): param = def_param() accuracy_final = 0 accuracy_initial = 0 time_final = 0 for method in param.method_list: param.method = method for name in param.list_data: param.name_data = name """ create result file """ direct = '/Users/messi/Documents/summer18/results/result_' + param.method + '_test111/' print(direct) file_name = direct + 'result_' + param.name_data + '.csv' if not os.path.exists(direct): os.makedirs(direct) with open(file_name, 'w') as fp: a = csv.writer(fp, delimiter=',') #row_new = [['run', 'Accuracy_init', 'Accuracy_final', 'num. iters', 'soltime']] row_new = [[ 'run', 'Accuracy_init', 'Accuracy_final', 'num. iters', 'soltime', 'moment_time' ]] a.writerows(row_new) m_accuracy_init = 0 m_accuracy = 0 m_num_iters = 0 m_sol_time = 0 m_moment = 0 for run in range(16, 18): # run = 15 data = dataReader(param, run + 1) print(run) print(param.method) print(param.name_data) print('Optimization process:') start_time = timeit.default_timer() w_final, num_iters = solver(param, data) end_time = timeit.default_timer() soltime_time = end_time - start_time accuracy_init = computeAccuracy(param, data, data.initw) print('Accuracy with initial w is: {}'.format(accuracy_init)) accuracy = computeAccuracy(param, data, w_final) sol_time = soltime_time print( 'Accuracy after minimization is: {} with solution time :{}' .format(accuracy, sol_time)) #row_new = [[run, accuracy_init, accuracy, num_iters, sol_time]] row_new = [[ run, accuracy_init, accuracy, num_iters, sol_time, data.soltime_time_mom ]] """ write result file """ m_accuracy_init += accuracy_init / param.num_run m_accuracy += accuracy / param.num_run m_num_iters += num_iters / param.num_run m_sol_time += sol_time / param.num_run m_moment += data.soltime_time_mom / param.num_run with open(file_name, 'a', newline="") as fp: a = csv.writer(fp, delimiter=',') a.writerows(row_new) if run == param.num_run - 1: with open(file_name, 'a', newline="") as fp: a = csv.writer(fp, delimiter=',') row_new = [[ 'average', m_accuracy_init, m_accuracy, m_num_iters, m_sol_time, m_moment ]] a.writerows(row_new) accuracy_initial += accuracy_init accuracy_final += accuracy time_final += sol_time print('Total initial accuracy is: {}'.format( float(accuracy_initial) / param.num_run)) print('FINAL accuracy is: {} with solution time: {}'.format( float(accuracy_final) / param.num_run, float(time_final) / param.num_run)) print(w_final)
""" 读入csv格式的标签数据,转为dict :param file:标签数据 :return:label2idx """ # 最后一个是无法预测 label_name = defaultdict(str) # label_idx = defaultdict(lambda : -1) df = pd.read_csv(file) df = df.where(df.notnull(), 'null') for i, label in enumerate(df['0']): label_name[i] = label return label_name data = dataReader('./data/month1-6-v8-label-greater200/month6-test-half.json') label_name = read_label_noName( "./data/month1-6-v8-label-greater200/label/label1.csv") @app.route('/getJson', methods=['GET']) def get_json(): idx = request.args.get('idx') return jsonify(data.getDataByID(idx)) @app.route('/predict', methods=['GET', 'POST']) def predict(): #接收json文件 sample = json.loads(request.get_data(as_text=True)) print(type(sample))
def getDataset(command, robotType, minTimesteps): reader = dataReader.dataReader("rerunDB") dataset = reader.retrieveSensorData(command=command, robotType=robotType, minTimesteps=minTimesteps) return dataset
#Countries to be studied - see in documentation for list of countries keywords countries = {'FRA'} #Mawimum number of locations (random selection among all selected countries) max_number_loc = 10 #Number of lead times to be studied (up to 91) nbr_leadTimes = 10 #Starting and ending time of training period ('YYYY-MM-DD HH:MM:SS') start_time = '2012-01-02 00:00:00' end_time = '2012-12-31 00:00:00' #Starting and ending time of testing period - when scenarios will be generated ('YYYY-MM-DD HH:MM:SS') fore_start_time = '2014-09-01 00:00:00' fore_end_time = '2014-09-10 00:00:00' #Use of the improved forecast model (0:no - 1:yes) - only relevant for wind case improv_forecast = 1 #Number of scenarios to be computed nb_scenarios = 50 ##CODE STRUCTURE - DON'T MODIFY IF ONLY USE import sys sys.path.insert(0, folder_code) from dataReader import dataReader from modelEstimation import modelEstimation from scenarioGeneration import scenarioGeneration, save_scenarios data = dataReader(countries, max_number_loc, renewable_type, data_type, start_time, end_time, fore_start_time, fore_end_time, nbr_leadTimes, folder_data) model = modelEstimation(data) scenarios = scenarioGeneration(model, data, improv_forecast, nb_scenarios) save_scenarios(scenarios, folder_output)
marker='.', color=c[i]) ax[4].set_title('Learnt Subspace') plt.tight_layout() if tosave: fig.savefig('ClusteringCompare-ThreeBlobs.png') return 1 else: plt.show(block=False) time.sleep(5) return 1 if __name__ == "__main__": folder = "./data" loader = dataReader(folder=folder) testSpiral(loader) testPath(loader) testCompound(loader) testYeast(loader) testGlass(loader) testYeast(loader) testEcoli(loader) testMove(loader) testCoil(loader) testStock(loader) testCoil20(loader) testEYB10(loader) testEYB20(loader)
def enumerteSPtuples(self,keyword=None,tweetNum=99999): reader = dataReader() c = reader.cursor_tweet(keyword) tweetIndex = 0 tweetContent = {} SPtuples = [] for index_,item in enumerate(c): tweetIndex += 1 ''' Use for Tweet2016 data clean content = re.sub(r'<a.*?/a>|<i.*?/i>|<span.*?>|</span>|<br.*>|</br>|<img.*?>|#|\[.*?\]','',item['Content']+ ';' +item['TweetContent_repost']) content = re.sub(r'//:',';',content) END ''' print tweetIndex,':' content = item['Content'].replace(u'\u200b','') content = content.replace(u'\xa0','') content = content.replace(u'\u5168\u6587','') content = content.encode('utf-8') content = re.sub(r'#.*?#|@.*?\s|\[.*?\]|\s|【|】|全文|的秒拍视频|(|)|“|”','',content) print content sents = SentenceSplitter.split(content) tweetContent[tweetIndex] = content for sent in sents: words = self.segmentor.segment(sent) postags = self.postagger.postag(words) netags = self.recognizer.recognize(words, postags) # 命名实体识别 arcs = self.parser.parse(words, postags) roles = self.labeller.label(words, postags, netags, arcs) # 语义角色标注 # Processing data print: # print sent # print list(words)[-2] # for j in range(0,len(words)): # print j,'\t', # print # print '\t'.join(words) # print '\t'.join(postags) # print '\t'.join(netags) # print "\t".join("%d:%s" % (arc.head, arc.relation) for arc in arcs) # End for role in roles: subject = [[arg.name, list(words[arg.range.start:arg.range.end+1])] for arg in role.arguments if arg.name in ['A0','A1','A2']] if len(subject) > 0: print 'Predicate:',words[role.index],":",'Subject:', # print words[role.index],":",role.arguments p = ''.join(words[role.index]) s = '' for r in subject: # print r[0],':', for text in r[1]: print text, print ';', s = s +' '.join(r[1]) print SPtuples.append(SPtulpe(p,s,tweetIndex)) if index_+1 == 20: break return tweetContent,SPtuples
import numpy as np; from scipy.stats.stats import pearsonr; from scipy.stats.stats import spearmanr; from scipy.stats.stats import linregress; import matplotlib.pyplot as plt; import dataReader as read; import time; #Written to do a few informal looks at the data. Checks for correlation between feature of touch sensor data and reinforcements. reader = read.dataReader("rerunDB") robotType = "simple" command = "jump" dataset = reader.retrieveSensorData(robotType = robotType, command = command, minTimesteps = 3000); SENSOR_DATA = 0; REWARD_SIGNALS = 1; print(dataset[1].count(-1)); print(dataset[1].count(1)); meanGroundedTime = [] for robot in dataset[0]: robotGroundedTimesteps = [] for row in robot: rowGrounded = 1 if 1 in row else 0; robotGroundedTimesteps.append(rowGrounded); meanGroundedTime.append(np.mean(robotGroundedTimesteps));
def main(): param = def_param() for method in param.method_list: param.method = method for name in param.list_data: param.name_data = name """ creat result file """ direct = '/Users/messi/Documents/Year1/summer18/results_auc/result_' + param.method + '_test2/' file_name = direct + 'result_' + param.name_data + '.csv' if not os.path.exists(direct): os.makedirs(direct) auc_final = 0 auc_init = 0 time_final = 0 with open(file_name, 'w') as fp: a = csv.writer(fp, delimiter=',') row_new = [[ 'run', 'auc_init', 'auc_final', 'num. iters', 'soltime', 'moment_time' ]] a.writerows(row_new) m_accuracy_init = 0 m_accuracy = 0 m_num_iters = 0 m_sol_time = 0 m_moment = 0 for run in range(param.num_run): data = dataReader(param, run + 1) start_time = timeit.default_timer() w_final, num_iters = solver(param, data) end_time = timeit.default_timer() soltime_time = end_time - start_time print('solution time is: {}'.format(soltime_time)) fval_auc_init = computeAUC(param, data, data.initw) print('auc value with initial w is: {}'.format(fval_auc_init)) fval_auc = computeAUC(param, data, w_final) sol_time = soltime_time print('auc value for this run is : {} with solution time :{}'. format(fval_auc, sol_time)) row_new = [[ run, fval_auc_init, fval_auc, num_iters, sol_time, data.soltime_time_mom ]] m_accuracy_init += fval_auc_init / param.num_run m_accuracy += fval_auc / param.num_run m_num_iters += num_iters / param.num_run m_sol_time += sol_time / param.num_run m_moment += data.soltime_time_mom / param.num_run auc_init += fval_auc_init auc_final += fval_auc time_final += sol_time if run == 0: row_new_0 = row_new """ write result file """ with open(file_name, 'a', newline="") as fp: a = csv.writer(fp, delimiter=',') a.writerows(row_new) if run == param.num_run - 1: with open(file_name, 'a', newline="") as fp: a = csv.writer(fp, delimiter=',') row_new = [[ 'average', m_accuracy_init, m_accuracy, m_num_iters, m_sol_time, m_moment ]] a.writerows(row_new) print('this is ', param.name_data) print('initial auc is: {}'.format(float(auc_init) / param.num_run)) print('final auc is: {}'.format(float(auc_final) / param.num_run)) print('final solution time is: {}'.format( float(time_final) / param.num_run))
impr = pClickDF.shape[0] clicks = pClickDF['click'].sum() ctr = clicks * 100 / impr for p in pCTRDF['pCTR']: bid = np.sqrt((c / lamda * p) + c**2) - c bids.append(bid) return bids if __name__ == "__main__": validationset = "../we_data/validation.csv" pClickset = "./pClick.csv" pCTRset = "./pCTRval.csv" validationReader = dataReader(validationset) pClickReader = dataReader(pClickset) pCTRReader = dataReader(pCTRset) validationDF = validationReader.getDataFrame() pClickDF = pClickReader.getDataFrame() pCTRDF = pCTRReader.getDataFrame() ortbsDF = pd.DataFrame() ortbsDF['bids'] = ortb_bid_generator() newValidationDF = pd.concat([validationDF, ortbsDF], axis=1) resultDF = pd.DataFrame( columns=['clicks', 'imps', 'spent', 'CTR', 'CPC', 'CPM']) lists = { 'clicks': [], 'imps': [],
import numpy as np from scipy.stats.stats import pearsonr from scipy.stats.stats import spearmanr from scipy.stats.stats import linregress import matplotlib.pyplot as plt import dataReader as read import time #Written to do a few informal looks at the data. Checks for correlation between feature of touch sensor data and reinforcements. reader = read.dataReader("rerunDB") robotType = "simple" command = "jump" dataset = reader.retrieveSensorData(robotType=robotType, command=command, minTimesteps=3000) SENSOR_DATA = 0 REWARD_SIGNALS = 1 print(dataset[1].count(-1)) print(dataset[1].count(1)) meanGroundedTime = [] for robot in dataset[0]: robotGroundedTimesteps = [] for row in robot: rowGrounded = 1 if 1 in row else 0 robotGroundedTimesteps.append(rowGrounded)