def barplot_rating_dist(item, single=False, group=None, savefig=None): with msg("plotting rating distribution"): ratings = Data.get_ratings()[:,item] nyms = Data.get_nyms() plt.xlabel('rating') plt.ylabel('no. ratings') step = 1 bins = np.arange(step/2, 5 + 1.5*step, step) hist = lambda d, **kwargs: plt.hist(d, bins=bins, rwidth=step*0.75, **kwargs) if group is not None: plt.title(f'Item {item}, group {group} rating distribution') hist(ratings[nyms[group]].data) elif single: plt.title(f'Item {item} rating distribution') hist(ratings.data) else: plt.title(f'Item {item}, all groups rating distributions') for nym_n, nym in enumerate(nyms): hist(ratings[nym].data, histtype='step', linewidth=2 ,label=f'group {nym_n}') plt.legend() if savefig is None: plt.show() else: with msg(f'Saving figure to "{savefig}"'): plt.savefig(savefig, dpi=150) plt.clf()
def __init__(self, debug_mode=0): Tk.__init__(self) self.engine = None self.language = None self.width = 0 self.height = 0 self.resolution_code = None self.is_full_screen = IntVar() self.screen_ratio = None self.resolution_list = [] self.debug_mode = debug_mode if self.debug_mode: basicConfig(level=DEBUG) pil_logger = getLogger("PIL.PngImagePlugin") pil_logger.level = WARNING self.data_reader = DataReader(self) self._process_config() self.card_texts = {} self.ui_text_variables = {} self._load_text_variables() self.save_handler = SaveHandler(self) self.is_game_setup_in_progress = IntVar(value=0) self.is_game_in_progress = IntVar(value=0) self.is_turn_in_progress = IntVar(value=1) self._render_panes() self.is_game_in_progress.trace('w', self._follow_game_progress_change) self.is_turn_in_progress.trace('w', self._follow_turn_progress_change) self.players = {} self._text_placer() self.protocol("WM_DELETE_WINDOW", self.shutdown_ttk_repeat_fix) self.exit_in_progress = False
def read_input_data(file_name): dr = DataReader(file_name) texts, scores = dr.read_data() tk = Tokenizer() tk.fit_on_texts(texts) x = tk.texts_to_matrix(texts, mode='tfidf') x = utils.matrix_to_input(x) y = utils.scores_to_categorical(scores) return x, y
def process_text_data(file_path, vocab_size): """ This function is responsible for preprocessing the text data we will use to train our model. It will perform the following steps: * Create an word array for the file we have received. For example, if our text is: 'I want to learn wordvec to do cool stuff' It will produce the following array: ['I', 'want', 'to', 'learn', 'wordvec', 'to', 'do', 'cool', 'stuff'] * Create the frequency count for every word in our array: [('I', 1), ('want', 1), ('to', 2), ('learn', 1), ('wordvec', 1), ('do', 1), ('cool', 1), ('stuff', 1)] * With the count array, we choose as our vocabulary the words with the highest count. The number of words will be decided by the variable vocab_size. * After that we will create a dictionary to map a word to an index and an index to a word: index2word: {0: 'I', 1: 'want', 2: 'to', 3: 'learn', 4: 'wordvec', 5: 'do', 6: 'cool', 7: 'stuff'} word2index: {'I': 0, 'want': 1, 'to': 2, 'learn': 3, 'wordvec': 4, 'do': 5, 'cool': 6, 'stuff': 7} Both of these dictionaries are based on the words provided by the count array. * Finally, we will transform the words array to a number array, using the word2vec dictionary. Therefore, our words array: ['I', 'want', 'to', 'learn', 'wordvec', 'to', 'do', 'cool', 'stuff'] Will be translated to: [0, 1, 2, 3, 4, 2, 5, 6, 7] If a word is not present in the word2index array, it will be considered an unknown word. Every unknown word will be mapped to the same index. """ my_data = DataReader(file_path) my_data.process_data(vocab_size) return my_data
def correlations(): if request.method == 'POST': request_json = request.get_json() user_id = request_json['userId'] x_label = request_json['xAxis'] y_label = request_json['yAxis'] next_day = request_json['nextDay'] cr = DataReader() response = make_response( json.dumps( cr.read_correlation_data(user_id, x_label, y_label, bool(next_day)))) response.headers['Content-Type'] = 'application/json' return response
def show_measurement(): if request.method == 'POST': request_json = request.get_json() user_id = request_json['userId'] type = request_json['type'] start_date = request_json['beginDate'] end_date = request_json['endDate'] r = DataReader() start = datetime.strptime(start_date, '%d.%m.%Y') end = datetime.strptime(end_date, '%d.%m.%Y') response = make_response( json.dumps(r.heart_rate_special(user_id, start, end))) response.headers['Content-Type'] = 'application/json; charset=utf-8' return response
def test(self, test_info, path_to_model): """Test given model with task, path to the model and model datareder names.""" # 1. Load trained model and set it to eval mode Model = ModelCT() Model.load_state_dict(torch.load(path_to_model)) Model.eval() Model.cpu() # 2. Create datalodaer test_datareader = DataReader(self.main_path_to_data, test_info) test_generator = DataLoader(test_datareader, batch_size=10, shuffle=False, pin_memory=True, num_workers=2) # 3. Calculate metrics predictions = [] trues = [] for item_test in test_generator: prediction = Model.predict(item_test, is_prob=True) predictions.append(np.mean(prediction.cpu().numpy())) trues.append(item_test[1].numpy()[0]) auc = roc_auc_score(trues, predictions) fpr, tpr, thresholds = roc_curve(trues, predictions, pos_label=1) return auc, fpr, tpr, thresholds, trues, predictions
class MyWindow(Gtk.ApplicationWindow): datareader = DataReader() def __init__(self, app): Gtk.Window.__init__(self, application=app) self.set_default_size(800, 600) self.builder = Gtk.Builder() self.builder.add_from_file("main.glade") tesla = self.datareader.get_stock_data("TSLA") chart1 = PriceChart(tesla) priceChartBox = self.builder.get_object("PriceChart") priceChartBox.add(chart1.canvas) apple = self.datareader.get_stock_data("AAPL") chart2 = PriceChart(apple) priceChartBox = self.builder.get_object("PriceChart2") priceChartBox.add(chart2.canvas) window = self.builder.get_object("MainWindow") window.show_all()
def total_rmse(): group_count = DataReader.nym_count() item_count = R.shape[1] total_rmse = 0 item_lam = lam.sum(axis=0) highest_n = 500 large_items = np.argpartition(item_lam, -highest_n)[-highest_n:] with msg('Splitting group ratings'): group_ratings = [] for group in range(group_count): group_ratings.append(R[P[group]]) with msg('Getting rmse(s)'): count = 0 for nth_item, item in enumerate(large_items): for group in range(group_count): mean = Rtilde[group, item] # if mean < 3.5 and mean > 2.5: # if mean > 4: if True: count += 1 data = group_ratings[group][:, item].data var = Rvar[group, item] if var == 0: var = 0.01 total_rmse += get_rmse(data, mean, var) if (nth_item) % 10 == 0: mean_rmse = total_rmse / (count) print(f'[{nth_item}, {count}] Mean RMSE: {mean_rmse}')
def plot_nym_stat(thresh=thresh_default, inv=False, savefig=False, outfile=outfile_default, begin=None, num=None, stat_option=stat_option_default): stat_name = stat_options[stat_option] if inv: stat_name = f'inverse {stat_name}' fig, ax = plt.subplots() ax.set( # ylim=(0, None), title=f'{stat_name} of each group by item number (thresh no. ratings >= {thresh})', xlabel='item number', ylabel=stat_name) cm = plt.get_cmap('gist_rainbow') colors = [cm(1.*i/Data.nym_count()) for i in range(Data.nym_count())] begin = 0 if begin is None else begin end = None if num is None else begin + num nym_stats = Data.get_nym_stats()[:, begin : (None if num is None else begin+num),:] for nym_n in range(Data.nym_count()): nym_n_stats = nym_stats[nym_n] with msg(f'plotting nym #{nym_n} {stat_name}'): valids = (nym_n_stats[:,3] >= thresh) print(f'{valids.sum()} of {len(valids)} valid (thresh = {thresh})') x = nym_n_stats[:,0][valids] if stat_option is 1: y = nym_n_stats[:,1][valids] elif stat_option is 2: y = nym_n_stats[:,2][valids] elif stat_option is 3: y = np.sqrt(nym_n_stats[:,2][valids]) if inv: y[y > 0] = 1 / y[y > 0] s = np.sqrt(nym_n_stats[:,3][valids]) ax.scatter(x, y, s=s, facecolors='none', edgecolors=colors[nym_n], label=f'group {nym_n}') ax.legend() if savefig: with msg('Saving "{}" to "{}"'.format(ax.title.get_text(), outfile)): ax.get_figure().savefig(outfile, dpi=150) plt.clf() else: plt.show()
def sleep_data(): if request.method == 'POST': request_json = request.get_json() user_id = request_json['userId'] start_date = request_json['beginDate'] end_date = request_json['endDate'] gaussian_settings = request_json['gaussianSettings'] r = DataReader() start = datetime.strptime(start_date, '%d.%m.%Y') end = datetime.strptime(end_date, '%d.%m.%Y') if gaussian_settings: sleep_data = r.read_sleep_data(user_id, start, end) average_list = [] var_list = [] for data in sleep_data: average_list.append(data['x']) var_list.append(data['y']) if len(average_list) > 1 and len(var_list) > 1: mean_duration = mean(average_list) variance_duration = variance(average_list) response = make_response( json.dumps([{ 'user_id': user_id, 'avg': mean_duration, 'std': math.sqrt(variance_duration) }])) response.headers[ 'Content-Type'] = 'application/json; charset=utf-8' return response else: response = make_response( json.dumps([{ 'user_id': user_id, 'avg': -1000, 'std': 1 }])) response.headers[ 'Content-Type'] = 'application/json; charset=utf-8' return response else: response = make_response( json.dumps(r.read_sleep_data(user_id, start, end))) response.headers[ 'Content-Type'] = 'application/json; charset=utf-8' return response
def test_run_training(self): """ Test to check if the read_text function return a list of words given a txt file. """ my_data = DataReader(get_path_basic_corpus()) my_vocab_size = 500 my_data.process_data(my_vocab_size) my_config = wv.Config(num_steps=200, vocab_size=my_vocab_size, show_step=2) my_model = wv.SkipGramModel(my_config) duration, loss = wv.run_training(my_model, my_data, verbose=False, visualization=False, debug=True) self.assertTrue(duration <= 1.7) self.assertTrue(loss < 7)
def heatmap_rating_dist(item): # def plot_rating_dists_across_groups(ratings, item, groups, savefig=False): with msg("plotting rating distribution"): ratings = Data.get_ratings()[:,item] nyms = Data.get_nyms() data = np.zeros((10, len(nyms))) for nym_n, nym in enumerate(nyms): unique, count = np.unique(ratings[nym].data, return_counts=True) for rating, count in dict(zip(unique, count)).items(): data[int(2*rating - 1), nym_n] = count ax = sns.heatmap(data) ax.set( title="Distribution of item #{} ratings by group".format(int(item)), xlabel="group number", ylabel="rating", yticklabels=np.linspace(0.5, 5, 10)) plt.show()
def main(): config = Config(CONFIG_FILE) # Retrieve data data = DataReader(config) model = SsdModel(config.n_classes) train_data = data.getVOC07TrainData(shuffle=config.shuffle) test_data = data.getVOC07TestData() # TODO: Preprocess/format data for training X_train = train_data y_train = train_data X_test = test_data # Train model model = SsdModel(config.n_classes) model.train(X_train, y_train) model.test(X_test) showSampleData(train_data)
def __init__(self, dataset_name: str, field: str, # TODO rewards: Dict[str, int], ): print(f'<SimpleHiEnv>: data set:{dataset_name},field:{field}.') # TODO! self.field = field # necessary? # init DataReader, and then init sets self.reader = DataReader(dataset_name, field) self.seeds = self.reader.get_original_seeds() self.gt = self.reader.get_gt_set() self.current_entity_set = self.seeds.copy() self.if_continue = True self.candidate_list = [] # init CGExpan device= torch.device("cuda:0") self.cgexpan = CGExpan(device, self.reader) # TODO self.rewards = rewards print('<SimpleHiEnv>: Env is ready!')
def main(start): file_count = len(CATEGORY[start:]) * len(YEAR) * len(OUTLET) pbar = ProgressBar(max_value=file_count, redirect_stdout=True) progress = INITIAL_PROGRESS for cat in CATEGORY[start:]: # instance of DataReader that will retain all necessary data for categorical manipulation by_category = DataReader() for time in YEAR: by_category.create_reference(cat, time) by_category.create_zeroes(cat, time) for store in OUTLET: # import relevant data for the given category, year, and store by_category.store_data(cat, time, store) # append individual occ_data to by_category.occ_list occurrence(by_category) # update by_category.sales_data with sales data sales(by_category) # update by_category.unitp_data with units data units(by_category) # append total purchase data (panel) to by_category.panel_list panels(by_category) # at the very end, update progress bar pbar, progress = update_pbar(pbar, progress) # append completed sales data to by_category.sales_list by_category.sales_list.append(by_category.sales_data) # append completed units data to by_category.units_list by_category.units_list.append(by_category.units_data) # concat all DataFrames and export the final product final_product(by_category, cat) return SUCCESS_CODE
def rawSearch(self): inputpath = self.fileForProcessing filerc = io.FileIO(inputpath) magic = filerc.read(4) filerc.close() self.preparedLicumsForGraph = defaultdict(list) self.framesCount = len(self.indexArray) - 2 self.setAngle() self.setFocus() calc.setMatrixType(self.deviceTypeList.currentIndex()) calc.getHalfOfMaxAngle() if magic == b'\x073"\x11': # filerc = io.FileIO(inputpath) # frameLen = self.indexArray[1] - self.indexArray[0] # rcFrame = filerc.read(frameLen) # startMarker = rcFrame.find(b'\xff\xd8') # self.getImageSize(rcFrame[startMarker:]) filerc = io.FileIO(inputpath) for ind in range(0, self.framesCount): ##Открытие рс и чтение его через IO контейнер(начало и длина региона) frameLen = self.indexArray[ind + 1] - self.indexArray[ind] rcFrame = filerc.read(frameLen) startMarker = rcFrame.find(b'\xff\xd8') radarData = rcFrame[:startMarker] licumsList = datareader.getRawData(radarData, ind) for target in licumsList.keys(): xCoord, yCoord = calc.getLicumCoordsInMetersAlterN( licumsList[target][5], licumsList[target][6], licumsList[target][0], ind) if xCoord == None: continue self.preparedLicumsForGraph[ind] = self.appendLists( self.preparedLicumsForGraph[ind], 3) self.preparedLicumsForGraph[ind][0].append( licumsList[target][0]) self.preparedLicumsForGraph[ind][1].append(xCoord) self.preparedLicumsForGraph[ind][2].append(yCoord) self.tryFindAngle() # self.delay = self.framesCount calc.cam_angle = 0 self.delay = 500 self.drawGraph()
def __init__(self): self.reader = DataReader("yeast.data", 1, 1, " ") self.pts, self.mini, self.maxi, self.dimens, self.classes = self.reader.getPoints() self.points = [] for pt, c in zip(self.pts, self.classes): point = Point() point.position = pt point.classe = c self.points.append(point) self.i = 0 self.max_iterations = 1 self.clusters = [] self.possible_classes = self.get_possible_classes()
def test_read_text(self): """ Test to check if the read_text function return a list of words given a txt file. """ dr1 = DataReader() dr2 = DataReader(punctuation=True) words1 = dr1.read_text() words2 = dr2.read_text() print("\nReading time = {}\n".format(get_time(dr1.read_text))) self.assertTrue(len(words1) > 0) self.assertTrue(len(words2) > 0) self.assertEqual(words1[22], "System") self.assertEqual(words2[22], "System.")
def __init__(self, path, nvar, iteration=1500, lr=0.01): """ :param path: :param nvar: 变量的数目 :param iteration: :param lr: """ data = DataReader.read(path, nvar + 1) # nvar + y self.y = np.array([data[-1]]).transpose() self.x = np.array([np.ones((len(self.y),))] \ + [np.array(data[i]) for i in range(nvar)]).transpose() self.theta = np.zeros((nvar + 1, 1)) self.iteration = iteration self.lr = lr self.nvar = nvar self.mu = self.x.mean(0) self.s = self.x.max(0) - self.x.min(0) self.mu[0] = 0 self.s[0] = 1 # for x_0: (1 - 0) / 1 = 1 self.feature_normed = False
def read_data(data_path, word_word2index, char_word2index, label_word2index, label_type, label_bucket, max_size=None, normalize_digits=True, use_lm=False, use_elmo=False): _buckets = label_bucket[label_type] max_length = 0 data = [[] for _ in _buckets] max_char_length = [0 for _ in _buckets] print('Reading data from %s' % data_path) counter = 0 reader = DataReader(data_path, word_word2index, char_word2index, label_word2index, use_elmo) inst = reader.get_next(normalize_digits) while inst is not None and (not max_size or counter < max_size): max_length = max(max_length, inst[6]) counter += 1 if counter % 10000 == 0: print("reading data: %d" % counter) inst_size = len(inst[0]) for bucket_id, bucket_size in enumerate(_buckets): if inst_size < bucket_size: if use_elmo: words = inst[0] else: words = inst[1] if use_lm: data[bucket_id].append( [words, inst[3], inst[5], inst[7], inst[8]]) else: data[bucket_id].append([words, inst[3], inst[5]]) max_len = max([len(char_seq) for char_seq in inst[2]]) if max_char_length[bucket_id] < max_len: max_char_length[bucket_id] = max_len break inst = reader.get_next(normalize_digits) reader.close() print("Total number of data: %d" % counter) print("Max length: %d" % max_length) return data, max_char_length
# скоростные параметры learning_rate = 1e-6 # скорость обучения num_epochs = 10 # количество эпох input_channels = 1 # входной канал input_height = 28 # высота input_width = 28 # ширина num_classes = 6 # количество классов изображений # размер изображения, класс изображения one_layer_net = OneLayerNet(input_height * input_width, num_classes) #путь деррикториям с картинками train_dir = "data/train" test_dir = "data/test" train_generator = DataReader(train_dir, [input_height, input_width], True, input_channels, num_classes).get_generator() # берется с помощью DataReader изображения, тренировочную и тестовую выборку test_generator = DataReader(test_dir, [input_height, input_width], False, input_channels, num_classes).get_generator() print('Size of training set: {}'.format( train_generator.get_data_size())) # вес тренировочных изображений print('Size of testing set: {}'.format( test_generator.get_data_size())) # вес тесовых изображенй изображений print("{} Start training...".format(datetime.now())) # время начала обучения # функция ,которая для каждой итерации считает ошибочность распознавания в обучении for epoch in range(num_epochs): print("{} Epoch number: {}".format(datetime.now(), epoch + 1)) loss = 0
class SimpleHiEnv(BaseHiEnv): def __init__(self, dataset_name: str, field: str, # TODO rewards: Dict[str, int], ): print(f'<SimpleHiEnv>: data set:{dataset_name},field:{field}.') # TODO! self.field = field # necessary? # init DataReader, and then init sets self.reader = DataReader(dataset_name, field) self.seeds = self.reader.get_original_seeds() self.gt = self.reader.get_gt_set() self.current_entity_set = self.seeds.copy() self.if_continue = True self.candidate_list = [] # init CGExpan device= torch.device("cuda:0") self.cgexpan = CGExpan(device, self.reader) # TODO self.rewards = rewards print('<SimpleHiEnv>: Env is ready!') def state(self) -> Tuple[List[Entity], List[Entity], str]: return self.current_entity_set, self.candidate_list, self.field def if_stop(self): return self.if_continue def action_expand(self, keys: List[Entity]) -> int: expanded = self.cgexpan.expand(keys) expanded = unique_by(expanded, lambda c: c.eid) # TODO: shuffle # if self.sort_candidates: # self._sort_candidates_by_distance_to_keys(l, keys) # else: # random.shuffle(l) self.candidate_list = expanded[0:40] if len(self.candidate_list) < 3: self.if_continue = False for candidate in self.candidate_list: if (candidate in self.gt) or (candidate in self.seeds): candidate.ground_truth = True else: candidate.ground_truth = False return 0 def action_judge(self, answers: List[bool]) -> List[int]: # TODO! results = [] for candidate, answer in zip(self.candidate_list, answers): # TODO: if answer and (candidate not in self.current_entity_set): self.current_entity_set.append(candidate) if candidate.ground_truth == answer: results.append(self.rewards["correct"]) else: results.append(self.rewards["wrong"]) print("Now current entity set has:", len(self.current_entity_set)) return results
def __init__( self, device, reader: DataReader, k=5, gen_thres=3, model_name='bert-base-uncased', ): self.tokenizer = BertTokenizer.from_pretrained(TOKEN_PATH, do_lower_case=False) self.maskedLM = BertForMaskedLM.from_pretrained( BERT_PATH, output_hidden_states=True) self.maskedLM.to(device) self.maskedLM.eval() self.k = k # TODO self.gen_thres = gen_thres # TODO self.reader = reader self.eid2name = reader.get_eid2name() self.keywords = reader.get_keywords() self.eid2idx = reader.get_eid2idx() # TODO self.entity_pos = reader.get_entity_pos() # TODO self.pretrained_emb = reader.get_pretrained_emb() # TODO self.means = np.array( [np.mean(emb, axis=0) for emb in self.get_emb_iter()]) self.inflect = inflect.engine() mask_token = self.tokenizer.mask_token self.generation_templates = [ [mask_token, ' such as {} , {} , and {} .', 1], ['such ' + mask_token, ' as {} , {} , and {} .', 1], ['{} , {} , {} or other ' + mask_token, ' .', 0], ['{} , {} , {} and other ' + mask_token, ' .', 0], [mask_token, ' including {} , {} , and {} .', 1], [mask_token, ' , especially {} , {} , and {} .', 1], ] self.ranking_templates = [ '{} such as ' + mask_token + ' .', 'such {} as ' + mask_token + ' .', mask_token + ' or other {} .', mask_token + ' and other {} .', '{} including ' + mask_token + ' .', '{} especially ' + mask_token + ' .', ] self.expansion_templates = [ ('', ' such as {} , {} , {} , and {} .'), ('such ', ' as {} , {} , {} , and {} .'), ('{} , {} , {} , {} or other ', ' .'), ('{} , {} , {} , {} and other ', ' .'), ('', ' including {} , {} , {} , and {} .'), ('', ' , especially {} , {} , {} , and {} .'), ] self.calculated_cname_rep = {} print(f'<CGExpan>: CGExpan is ready!')
class RegressionDataset(Dataset): def __init__(self, inputs, labels): self.inputs = inputs self.labels = labels def __len__(self): return len(self.inputs) def __getitem__(self, id): sample = self.inputs[id], self.labels[id] return sample if __name__ == '__main__': fname = "data/AEP_hourly.csv" datareader = DataReader(fname) X, Y = datareader.get_data() dataset = RegressionDataset(inputs=X, labels=Y) dataset_loader = DataLoader(dataset, batch_size=8, shuffle=False, num_workers=2) for i, [input, label] in enumerate(dataset_loader): print(input) print(label) print() if i == 2: break
print('Client connected') @socket_.on('disconnect_request', namespace='/biometrics') def disconnect_request(): @copy_current_request_context def can_disconnect(): disconnect() print('Client disconnected') emit('my_response', {'data': 'Disconnected!'}, callback=can_disconnect) if __name__ == '__main__': reader = DataReader() def send_data(): while True: red, ir, hr, hr_v, spo2, spo2_v = reader.get_values() payload = { 't': round(time()), 'red': red, 'ir': ir, 'hr': hr, 'hr_v': hr_v, 'spo2': spo2, 'spo2_v': spo2_v } socket_.emit('data', payload, namespace="/biometrics") socket_.sleep(0.1)
currentdir = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) parentdir = os.path.dirname(currentdir) sys.path.insert(0, parentdir) from datareader import DataReader import word2vec as wv import util file_path = os.path.join(parentdir, "data") file_path = os.path.join(file_path, "Wiki.txt") eval_path = os.path.join(parentdir, "evaluation") eval_path = os.path.join(eval_path, "questions-words-ptbr.txt") my_data = DataReader(file_path) my_data.get_data() word2index = my_data.word2index index2word = my_data.index2word BATCH_SIZE = np.array(range(1, 17)) * 10 number_of_exp = len(BATCH_SIZE) results = [] info = [] for i, bs in enumerate(BATCH_SIZE): print("\n ({0} of {1})".format(i + 1, number_of_exp)) config = wv.Config(batch_size=bs) attrs = vars(config) config_info = ["%s: %s" % item for item in attrs.items()] info.append(config_info)
# Объединить все резюме вместе merged_summary = tf.summary.merge_all() valid_summary = tf.Summary() # Инициализируйте FileWriter writer_1 = tf.summary.FileWriter(filewriter_path + 'train') writer_2 = tf.summary.FileWriter(filewriter_path + 'validation') # Инициализируйте заставку для контрольных точек модели чекпоинт saver = tf.train.Saver() train_dir = '../data/train' test_dir = '../data/test' train_generator = DataReader(train_dir, [input_height, input_width], True, input_channels, num_classes).get_generator() test_generator = DataReader(test_dir, [input_height, input_width], False, input_channels, num_classes).get_generator() print('Size of training set: {}'.format(train_generator.get_data_size())) print('Size of testing set: {}'.format(test_generator.get_data_size())) train_patterns_per_epoch = train_generator.get_data_size() # начать сеанс with tf.Session() as sess: # инициализация всех переменных sess.run(tf.global_variables_initializer()) # добавит грфик модели в TensorBoard writer_1.add_graph(sess.graph)
def test_run(): '''function to test all the utlities''' # Define a date range dates = pd.date_range('2015-04-02', '2016-04-01') # Choose feature symbols to read location = os.path.join(base_dir, "BitcoinData") symbols = os.listdir(location) #build dataframe consisting of all features dfreader = DataReader() util = Utility() location = os.path.join(base_dir, "BitcoinData") df = dfreader.get_data(location, symbols, dates) df = util.normalize_data(df) for index in range(len(symbols)): symbols[index] = symbols[index].strip('.csv') plotter = DataPlotting() #plot dataframe in selected range and given features list plotter.plot_selected(df, symbols, '2015-05-01', '2015-06-01') #plot dataframe for all given data plotter.plot_data(df, "Bitcoin") dates = pd.date_range('2010-01-01', '2016-01-01') btc_file = "bitcoin-market-price.csv" location = os.path.join(base_dir, btc_file) df_btc = dfreader.get_btc(location, btc_file, dates) stats = Statistics(df) rmean = stats.get_rolling_mean(df_btc['bitcoin-market-price'], window=20) rstd = stats.get_rolling_std(df_btc.ix[:, 'bitcoin-market-price'], window=20) upper_band, lower_band = stats.get_bollinger_bands(rmean, rstd) # Plot raw values, rolling mean and Bollinger Bands ax = df_btc['bitcoin-market-price'].plot(title="Bollinger Bands", \ label='bitcoin-market-price') rmean.plot(label='Rolling mean', ax=ax) upper_band.plot(label='upper band', ax=ax) lower_band.plot(label='lower band', ax=ax) # Add axis labels and legend ax.set_xlabel("Date") ax.set_ylabel("Price") ax.legend(loc='upper left') plt.show() #compute daily returns daily_returns = stats.compute_daily_returns(df_btc) plotter.plot_data(daily_returns, title="Daily returns", ylabel="Daily returns") daily_returns.replace(to_replace=np.inf, value=np.NaN, inplace=True) # Plot a histogram daily_returns.hist(bins=21) # Get mean as standard deviation mean = daily_returns.mean() std = daily_returns.std() #print type(mean) plt.axvline(mean[0], color='w', linestyle='dashed', linewidth=2) plt.axvline(std[0], color='r', linestyle='dashed', linewidth=2) plt.axvline(-std[0], color='r', linestyle='dashed', linewidth=2) plt.show() # Scatterplots df.plot(kind='scatter', x='hash_rate', y='market_cap') beta_XOM, alpha_XOM = np.polyfit(df['hash_rate'], df['market_cap'], 1) # fit poly degree 1 plt.plot(df['hash_rate'], beta_XOM*df['market_cap'] + alpha_XOM, '-', color='r') plt.show() # Calculate correlation coefficient correlation = df['avg_block_size'].corr(df['n_tx'], method='pearson') print correlation
def setUpClass(cls): cls.dr = DataReader() cls.words = cls.dr.read_text()
import numpy as np import matplotlib.pyplot as plt from myutils import msg from datareader import DataReader from dist_model import DiscreteNormal as DiscNorm rating_count = 5 dist_gen = DiscNorm(np.linspace(0.5, 5.5, num=rating_count + 1)) with msg("Getting data"): Rtilde = DataReader.get_Rtilde() Rvar = DataReader.get_Rvar() R = DataReader.get_ratings() lam = DataReader.get_lam() P = DataReader.get_nyms() def get_data_dist(data): ratings, counts = np.unique(data, return_counts=True) dist_data = np.zeros(rating_count) dist_data[ratings.astype(int) - 1] = counts / counts.sum() return dist_data def get_err(data, mean, var): dist_data = get_data_dist(data) dist_model = dist_gen.pmf(mean, var) return abs(dist_data / dist_model)
from datareader import DataReader from datawriter import DataWriter from femgrid import FemGrid from globaldata import GlobalData from result import Result from plotter import Plotter rMin, alfaAir, tempBegin, tempAir, tauMax, nTime = DataReader.readGlobalData("global_data") ne, nh, rMax, elements, nodes = DataReader.readElementsData("elements_data", rMin, tempBegin) globalData = GlobalData(ne, nh, rMin, rMax, alfaAir, tempBegin, tempAir, tauMax, nTime) globalData.printGlobalData() femGrid = FemGrid(elements, nodes) result = femGrid.simulateProcess(globalData) result.printTemperatures() DataWriter.writeData("result.txt", result.getTemperatures()) Plotter.plot("czas", "temperatura", femGrid.getTauArray(), result.getTemperatures())
class KMeans: def __init__(self): self.reader = DataReader("yeast.data", 1, 1, " ") self.pts, self.mini, self.maxi, self.dimens, self.classes = self.reader.getPoints() self.points = [] for pt, c in zip(self.pts, self.classes): point = Point() point.position = pt point.classe = c self.points.append(point) self.i = 0 self.max_iterations = 1 self.clusters = [] self.possible_classes = self.get_possible_classes() def randomPoint(self): point = Point() for mi, ma in zip(self.mini, self.maxi): point.position.append(uniform(mi, ma)) return point def randomClusters(self, max_clusters): for i in range(0, max_clusters): cluster = Cluster() cluster.centroid = self.randomPoint() self.clusters.append(cluster) def distanceBetween(self, p1, p2): distance = 0 for d1, d2 in zip(p1, p2): distance += (d2-d1) ** 2 return distance def realDistanceBetween(self, p1, p2): distance = 0 for d1, d2 in zip(p1, p2): distance += (d2-d1) ** 2 return sqrt(distance) def putPointInClosestCluster(self, point): min_distance = self.distanceBetween(point.position, self.clusters[0].centroid.position) cur_cluster = self.clusters[0] for cluster in self.clusters: distance = self.distanceBetween(point.position, cluster.centroid.position) if distance < min_distance: cur_cluster = cluster min_distance = distance cur_cluster.points.append(point) def assignPointsToClusters(self): for point in self.points: self.putPointInClosestCluster(point) def recalculateCentroids(self): for cluster in self.clusters: cluster.updateCentroid() def clearClusters(self): for cluster in self.clusters: cluster.points = [] def clusterToX(self, cluster): xarray = [] for p in cluster.points: xarray.append(p.position[0]) return xarray def clusterToY(self, cluster): yarray = [] for p in cluster.points: yarray.append(p.position[1]) return yarray def clusterToZ(self, cluster): zarray = [] for p in cluster.points: zarray.append(p.position[2]) return zarray def printClusters(self): import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D fig = plt.figure() ax = fig.add_subplot(111, projection='3d') for i, cluster in enumerate(self.clusters): if i == 0: ax.scatter(self.clusterToX(cluster), self.clusterToY(cluster), self.clusterToZ(cluster), zdir=u'z', s=20, c='b', marker='o') ax.scatter([cluster.centroid.position[0],], [cluster.centroid.position[1],], [cluster.centroid.position[2],], zdir=u'z', s=50, c='b', marker='s') if i == 1: ax.scatter(self.clusterToX(cluster), self.clusterToY(cluster), self.clusterToZ(cluster), zdir=u'z', s=20, c='r', marker='o') ax.scatter([cluster.centroid.position[0],], [cluster.centroid.position[1],], [cluster.centroid.position[2],], zdir=u'z', s=50, c='r', marker='s') if i == 2: ax.scatter(self.clusterToX(cluster), self.clusterToY(cluster), self.clusterToZ(cluster), zdir=u'z', s=20, c='g', marker='o') ax.scatter([cluster.centroid.position[0],], [cluster.centroid.position[1],], [cluster.centroid.position[2],], zdir=u'z', s=50, c='g', marker='s') if i == 3: ax.scatter(self.clusterToX(cluster), self.clusterToY(cluster), self.clusterToZ(cluster), zdir=u'z', s=20, c='y', marker='o') ax.scatter([cluster.centroid.position[0],], [cluster.centroid.position[1],], [cluster.centroid.position[2],], zdir=u'z', s=50, c='y', marker='s') if i == 4: ax.scatter(self.clusterToX(cluster), self.clusterToY(cluster), self.clusterToZ(cluster), zdir=u'z', s=20, c='c', marker='o') ax.scatter([cluster.centroid.position[0],], [cluster.centroid.position[1],], [cluster.centroid.position[2],], zdir=u'z', s=50, c='c', marker='s') plt.show() def get_possible_classes(self): classes = [] for p in self.points: if classes.count(p.classe) == 0: # print(p.classe) classes.append(p.classe) return classes def rand_index(self): cooccurrence_matrix = [] for possible_class in self.possible_classes: matrix_line = [] for cluster in self.clusters: array_of_classes = cluster.to_array_of_classes() matrix_line.append(array_of_classes.count(possible_class)) cooccurrence_matrix.append(matrix_line) import numpy as np array_matrix = np.array(cooccurrence_matrix) tp, fp, tn, fn = get_tp_fp_tn_fn(array_matrix) rand_index = float(tp + tn) / (tp + fp + fn + tn) precision = float(tp) / (tp + fp) recall = float(tp) / (tp + fn) return rand_index # execution def execute(self, args): # First Argument is the Number of Clusters # All the others are coordinates # Example (3, x1,y1,z1, x2,y2,z2, x3,y3,z3) self.clusters = [] iters = int(args[0]) if iters < 1: iters = 1 if iters > 5 : iters = 5 for i in range (0, iters): cluster = Cluster() cluster.centroid.position = args[(i * self.dimens) + 1: (i * self.dimens) + self.dimens + 1] self.clusters.append(cluster) i = 0 while i < self.max_iterations: self.clearClusters() self.assignPointsToClusters() # self.recalculateCentroids() i += 1 rand_index = self.rand_index() return rand_index