Ejemplo n.º 1
0
def barplot_rating_dist(item, single=False, group=None, savefig=None):

	with msg("plotting rating distribution"):
		ratings = Data.get_ratings()[:,item]
		nyms = Data.get_nyms()

		plt.xlabel('rating')
		plt.ylabel('no. ratings')
		step = 1
		bins = np.arange(step/2, 5 + 1.5*step, step)
		hist = lambda d, **kwargs: plt.hist(d, bins=bins, rwidth=step*0.75, **kwargs)
		if group is not None: 
			plt.title(f'Item {item}, group {group} rating distribution')
			hist(ratings[nyms[group]].data)
		elif single: 
			plt.title(f'Item {item} rating distribution')
			hist(ratings.data)
		else:
			plt.title(f'Item {item}, all groups rating distributions')
			for nym_n, nym in enumerate(nyms):
				hist(ratings[nym].data, histtype='step', linewidth=2 ,label=f'group {nym_n}')
			plt.legend()
		if savefig is None:
			plt.show()
		else:
			with msg(f'Saving figure to "{savefig}"'):
				plt.savefig(savefig, dpi=150)
			plt.clf()
Ejemplo n.º 2
0
 def __init__(self, debug_mode=0):
     Tk.__init__(self)
     self.engine = None
     self.language = None
     self.width = 0
     self.height = 0
     self.resolution_code = None
     self.is_full_screen = IntVar()
     self.screen_ratio = None
     self.resolution_list = []
     self.debug_mode = debug_mode
     if self.debug_mode:
         basicConfig(level=DEBUG)
         pil_logger = getLogger("PIL.PngImagePlugin")
         pil_logger.level = WARNING
     self.data_reader = DataReader(self)
     self._process_config()
     self.card_texts = {}
     self.ui_text_variables = {}
     self._load_text_variables()
     self.save_handler = SaveHandler(self)
     self.is_game_setup_in_progress = IntVar(value=0)
     self.is_game_in_progress = IntVar(value=0)
     self.is_turn_in_progress = IntVar(value=1)
     self._render_panes()
     self.is_game_in_progress.trace('w', self._follow_game_progress_change)
     self.is_turn_in_progress.trace('w', self._follow_turn_progress_change)
     self.players = {}
     self._text_placer()
     self.protocol("WM_DELETE_WINDOW", self.shutdown_ttk_repeat_fix)
     self.exit_in_progress = False
Ejemplo n.º 3
0
def read_input_data(file_name):
    dr = DataReader(file_name)
    texts, scores = dr.read_data()
    tk = Tokenizer()
    tk.fit_on_texts(texts)
    x = tk.texts_to_matrix(texts, mode='tfidf')
    x = utils.matrix_to_input(x)
    y = utils.scores_to_categorical(scores)
    return x, y
Ejemplo n.º 4
0
def process_text_data(file_path, vocab_size):
    """
    This function is responsible for preprocessing the text data we will use to
    train our model. It will perform the following steps:

    * Create an word array for the file we have received. For example, if our
      text is:

        'I want to learn wordvec to do cool stuff'

    It will produce the following array:

        ['I', 'want', 'to', 'learn', 'wordvec', 'to', 'do', 'cool', 'stuff']

    * Create the frequency count for every word in our array:

       [('I', 1), ('want', 1), ('to', 2), ('learn', 1), ('wordvec', 1),
        ('do', 1), ('cool', 1), ('stuff', 1)]

    * With the count array, we choose as our vocabulary the words with the
      highest count. The number of words will be decided by the variable
      vocab_size.

    * After that we will create a dictionary to map a word to an index and an
      index to a word:

      index2word: {0: 'I', 1: 'want', 2: 'to', 3: 'learn', 4: 'wordvec',
                   5: 'do', 6: 'cool', 7: 'stuff'}
      word2index: {'I': 0, 'want': 1, 'to': 2, 'learn': 3, 'wordvec': 4,
                   'do': 5, 'cool': 6, 'stuff': 7}

      Both of these dictionaries are based on the words provided by the count
      array.

    * Finally, we will transform the words array to a number array, using the
      word2vec dictionary.

      Therefore, our words array:

      ['I', 'want', 'to', 'learn', 'wordvec', 'to', 'do', 'cool', 'stuff']

      Will be translated to:

      [0, 1, 2, 3, 4, 2, 5, 6, 7]

      If a word is not present in the word2index array, it will be considered an
      unknown word. Every unknown word will be mapped to the same index.
    """
    my_data = DataReader(file_path)
    my_data.process_data(vocab_size)
    return my_data
Ejemplo n.º 5
0
def correlations():
    if request.method == 'POST':
        request_json = request.get_json()
        user_id = request_json['userId']
        x_label = request_json['xAxis']
        y_label = request_json['yAxis']
        next_day = request_json['nextDay']
        cr = DataReader()
        response = make_response(
            json.dumps(
                cr.read_correlation_data(user_id, x_label, y_label,
                                         bool(next_day))))
        response.headers['Content-Type'] = 'application/json'
        return response
Ejemplo n.º 6
0
def show_measurement():
    if request.method == 'POST':
        request_json = request.get_json()
        user_id = request_json['userId']
        type = request_json['type']
        start_date = request_json['beginDate']
        end_date = request_json['endDate']
        r = DataReader()
        start = datetime.strptime(start_date, '%d.%m.%Y')
        end = datetime.strptime(end_date, '%d.%m.%Y')
        response = make_response(
            json.dumps(r.heart_rate_special(user_id, start, end)))
        response.headers['Content-Type'] = 'application/json; charset=utf-8'
        return response
Ejemplo n.º 7
0
    def test(self, test_info, path_to_model):
        """Test given model with task, path to the model and model datareder names."""

        # 1. Load trained model and set it to eval mode
        Model = ModelCT()
        Model.load_state_dict(torch.load(path_to_model))
        Model.eval()
        Model.cpu()

        # 2. Create datalodaer
        test_datareader = DataReader(self.main_path_to_data, test_info)
        test_generator = DataLoader(test_datareader,
                                    batch_size=10,
                                    shuffle=False,
                                    pin_memory=True,
                                    num_workers=2)

        # 3. Calculate metrics
        predictions = []
        trues = []

        for item_test in test_generator:
            prediction = Model.predict(item_test, is_prob=True)
            predictions.append(np.mean(prediction.cpu().numpy()))
            trues.append(item_test[1].numpy()[0])

        auc = roc_auc_score(trues, predictions)
        fpr, tpr, thresholds = roc_curve(trues, predictions, pos_label=1)
        return auc, fpr, tpr, thresholds, trues, predictions
Ejemplo n.º 8
0
class MyWindow(Gtk.ApplicationWindow):

    datareader = DataReader()

    def __init__(self, app):
        Gtk.Window.__init__(self, application=app)
        self.set_default_size(800, 600)

        self.builder = Gtk.Builder()
        self.builder.add_from_file("main.glade")

        tesla = self.datareader.get_stock_data("TSLA")
        chart1 = PriceChart(tesla)

        priceChartBox = self.builder.get_object("PriceChart")
        priceChartBox.add(chart1.canvas)

        apple = self.datareader.get_stock_data("AAPL")
        chart2 = PriceChart(apple)

        priceChartBox = self.builder.get_object("PriceChart2")
        priceChartBox.add(chart2.canvas)

        window = self.builder.get_object("MainWindow")
        window.show_all()
Ejemplo n.º 9
0
def total_rmse():
    group_count = DataReader.nym_count()
    item_count = R.shape[1]
    total_rmse = 0

    item_lam = lam.sum(axis=0)
    highest_n = 500
    large_items = np.argpartition(item_lam, -highest_n)[-highest_n:]

    with msg('Splitting group ratings'):
        group_ratings = []
        for group in range(group_count):
            group_ratings.append(R[P[group]])

    with msg('Getting rmse(s)'):
        count = 0
        for nth_item, item in enumerate(large_items):
            for group in range(group_count):
                mean = Rtilde[group, item]
                # if mean < 3.5 and mean > 2.5:
                # if mean > 4:
                if True:
                    count += 1
                    data = group_ratings[group][:, item].data
                    var = Rvar[group, item]
                    if var == 0: var = 0.01
                    total_rmse += get_rmse(data, mean, var)

            if (nth_item) % 10 == 0:
                mean_rmse = total_rmse / (count)
                print(f'[{nth_item}, {count}] Mean RMSE: {mean_rmse}')
Ejemplo n.º 10
0
def plot_nym_stat(thresh=thresh_default, inv=False, savefig=False, outfile=outfile_default, begin=None, num=None, stat_option=stat_option_default):
	stat_name = stat_options[stat_option]
	if inv: stat_name = f'inverse {stat_name}'
	
	fig, ax = plt.subplots()
	ax.set(
		# ylim=(0, None),
		title=f'{stat_name} of each group by item number (thresh no. ratings >= {thresh})',
		xlabel='item number',
		ylabel=stat_name)
	
	cm = plt.get_cmap('gist_rainbow')
	colors = [cm(1.*i/Data.nym_count()) for i in range(Data.nym_count())]

	begin = 0 if begin is None else begin
	end = None if num is None else begin + num 
	nym_stats = Data.get_nym_stats()[:, begin : (None if num is None else begin+num),:]

	for nym_n in range(Data.nym_count()):
		nym_n_stats = nym_stats[nym_n]
		with msg(f'plotting nym #{nym_n} {stat_name}'):

			valids = (nym_n_stats[:,3] >= thresh)
			print(f'{valids.sum()} of {len(valids)} valid (thresh = {thresh})')

			x = nym_n_stats[:,0][valids]
			if stat_option is 1:
				y = nym_n_stats[:,1][valids]
			elif stat_option is 2:
				y = nym_n_stats[:,2][valids]
			elif stat_option is 3:
				y = np.sqrt(nym_n_stats[:,2][valids])

			if inv: y[y > 0] = 1 / y[y > 0]
			s = np.sqrt(nym_n_stats[:,3][valids])

			ax.scatter(x, y, s=s, facecolors='none', edgecolors=colors[nym_n], label=f'group {nym_n}')
	ax.legend()

	if savefig:
		with msg('Saving "{}" to "{}"'.format(ax.title.get_text(), outfile)):
			ax.get_figure().savefig(outfile, dpi=150)
			plt.clf()
	else:
		plt.show()
Ejemplo n.º 11
0
def sleep_data():
    if request.method == 'POST':
        request_json = request.get_json()
        user_id = request_json['userId']
        start_date = request_json['beginDate']
        end_date = request_json['endDate']
        gaussian_settings = request_json['gaussianSettings']
        r = DataReader()
        start = datetime.strptime(start_date, '%d.%m.%Y')
        end = datetime.strptime(end_date, '%d.%m.%Y')
        if gaussian_settings:
            sleep_data = r.read_sleep_data(user_id, start, end)
            average_list = []
            var_list = []
            for data in sleep_data:
                average_list.append(data['x'])
                var_list.append(data['y'])
            if len(average_list) > 1 and len(var_list) > 1:
                mean_duration = mean(average_list)
                variance_duration = variance(average_list)
                response = make_response(
                    json.dumps([{
                        'user_id': user_id,
                        'avg': mean_duration,
                        'std': math.sqrt(variance_duration)
                    }]))
                response.headers[
                    'Content-Type'] = 'application/json; charset=utf-8'
                return response
            else:
                response = make_response(
                    json.dumps([{
                        'user_id': user_id,
                        'avg': -1000,
                        'std': 1
                    }]))
                response.headers[
                    'Content-Type'] = 'application/json; charset=utf-8'
                return response
        else:
            response = make_response(
                json.dumps(r.read_sleep_data(user_id, start, end)))
            response.headers[
                'Content-Type'] = 'application/json; charset=utf-8'
            return response
Ejemplo n.º 12
0
    def test_run_training(self):
        """
        Test to check if the read_text function
        return a list of words given a txt file.
        """
        my_data = DataReader(get_path_basic_corpus())
        my_vocab_size = 500
        my_data.process_data(my_vocab_size)
        my_config = wv.Config(num_steps=200,
                              vocab_size=my_vocab_size,
                              show_step=2)

        my_model = wv.SkipGramModel(my_config)
        duration, loss = wv.run_training(my_model,
                                         my_data,
                                         verbose=False,
                                         visualization=False,
                                         debug=True)
        self.assertTrue(duration <= 1.7)
        self.assertTrue(loss < 7)
Ejemplo n.º 13
0
def heatmap_rating_dist(item):
	# def plot_rating_dists_across_groups(ratings, item, groups, savefig=False):
	with msg("plotting rating distribution"):
		ratings = Data.get_ratings()[:,item]
		nyms = Data.get_nyms()

		data = np.zeros((10, len(nyms)))
		for nym_n, nym in enumerate(nyms):
			unique, count = np.unique(ratings[nym].data, return_counts=True)
			for rating, count in dict(zip(unique, count)).items():
				data[int(2*rating - 1), nym_n] = count

		ax = sns.heatmap(data)
		ax.set(
			title="Distribution of item #{} ratings by group".format(int(item)),
			xlabel="group number", 
			ylabel="rating", 
			yticklabels=np.linspace(0.5, 5, 10))
		
		plt.show()
Ejemplo n.º 14
0
def main():
    config = Config(CONFIG_FILE)

    # Retrieve data
    data = DataReader(config)

    model = SsdModel(config.n_classes)

    train_data = data.getVOC07TrainData(shuffle=config.shuffle)
    test_data = data.getVOC07TestData()

    # TODO: Preprocess/format data for training
    X_train = train_data
    y_train = train_data

    X_test = test_data

    # Train model
    model = SsdModel(config.n_classes)
    model.train(X_train, y_train)
    model.test(X_test)
    showSampleData(train_data)
Ejemplo n.º 15
0
    def __init__(self,
                 dataset_name: str,
                 field: str,  # TODO
                 rewards: Dict[str, int],
               ):
        print(f'<SimpleHiEnv>: data set:{dataset_name},field:{field}.')
        # TODO!
        self.field = field  # necessary?

        # init DataReader, and then init sets
        self.reader = DataReader(dataset_name, field)
        self.seeds = self.reader.get_original_seeds()
        self.gt = self.reader.get_gt_set()
        self.current_entity_set = self.seeds.copy()
        self.if_continue = True
        self.candidate_list = []

        # init CGExpan
        device= torch.device("cuda:0")
        self.cgexpan = CGExpan(device, self.reader)  # TODO

        self.rewards = rewards
        print('<SimpleHiEnv>: Env is ready!')
Ejemplo n.º 16
0
def main(start):
    file_count = len(CATEGORY[start:]) * len(YEAR) * len(OUTLET)
    pbar = ProgressBar(max_value=file_count, redirect_stdout=True)
    progress = INITIAL_PROGRESS

    for cat in CATEGORY[start:]:

        # instance of DataReader that will retain all necessary data for categorical manipulation
        by_category = DataReader()

        for time in YEAR:
            by_category.create_reference(cat, time)
            by_category.create_zeroes(cat, time)

            for store in OUTLET:

                # import relevant data for the given category, year, and store
                by_category.store_data(cat, time, store)

                # append individual occ_data to by_category.occ_list
                occurrence(by_category)

                # update by_category.sales_data with sales data
                sales(by_category)

                # update by_category.unitp_data with units data
                units(by_category)

                # append total purchase data (panel) to by_category.panel_list
                panels(by_category)

                # at the very end, update progress bar
                pbar, progress = update_pbar(pbar, progress)

            # append completed sales data to by_category.sales_list
            by_category.sales_list.append(by_category.sales_data)

            # append completed units data to by_category.units_list
            by_category.units_list.append(by_category.units_data)

        # concat all DataFrames and export the final product
        final_product(by_category, cat)

    return SUCCESS_CODE
Ejemplo n.º 17
0
    def rawSearch(self):
        inputpath = self.fileForProcessing
        filerc = io.FileIO(inputpath)
        magic = filerc.read(4)
        filerc.close()
        self.preparedLicumsForGraph = defaultdict(list)
        self.framesCount = len(self.indexArray) - 2
        self.setAngle()
        self.setFocus()
        calc.setMatrixType(self.deviceTypeList.currentIndex())
        calc.getHalfOfMaxAngle()
        if magic == b'\x073"\x11':
            # filerc = io.FileIO(inputpath)
            # frameLen = self.indexArray[1] - self.indexArray[0]
            # rcFrame = filerc.read(frameLen)
            # startMarker = rcFrame.find(b'\xff\xd8')
            # self.getImageSize(rcFrame[startMarker:])
            filerc = io.FileIO(inputpath)
            for ind in range(0, self.framesCount):
                ##Открытие рс и чтение его через IO контейнер(начало и длина региона)
                frameLen = self.indexArray[ind + 1] - self.indexArray[ind]
                rcFrame = filerc.read(frameLen)
                startMarker = rcFrame.find(b'\xff\xd8')
                radarData = rcFrame[:startMarker]

                licumsList = datareader.getRawData(radarData, ind)
                for target in licumsList.keys():

                    xCoord, yCoord = calc.getLicumCoordsInMetersAlterN(
                        licumsList[target][5], licumsList[target][6],
                        licumsList[target][0], ind)
                    if xCoord == None:
                        continue
                    self.preparedLicumsForGraph[ind] = self.appendLists(
                        self.preparedLicumsForGraph[ind], 3)
                    self.preparedLicumsForGraph[ind][0].append(
                        licumsList[target][0])
                    self.preparedLicumsForGraph[ind][1].append(xCoord)
                    self.preparedLicumsForGraph[ind][2].append(yCoord)

        self.tryFindAngle()
        # self.delay = self.framesCount
        calc.cam_angle = 0
        self.delay = 500
        self.drawGraph()
Ejemplo n.º 18
0
    def __init__(self):
        self.reader = DataReader("yeast.data", 1, 1, " ")

        self.pts, self.mini, self.maxi, self.dimens, self.classes = self.reader.getPoints()

        self.points = []

        for pt, c in zip(self.pts, self.classes):
            point = Point()
            point.position = pt
            point.classe = c
            self.points.append(point)

        self.i = 0
        self.max_iterations = 1

        self.clusters = []

        self.possible_classes = self.get_possible_classes()
Ejemplo n.º 19
0
    def test_read_text(self):
        """
        Test to check if the read_text function
        return a list of words given a txt file.
        """
        dr1 = DataReader()
        dr2 = DataReader(punctuation=True)
        words1 = dr1.read_text()
        words2 = dr2.read_text()
        print("\nReading time = {}\n".format(get_time(dr1.read_text)))

        self.assertTrue(len(words1) > 0)
        self.assertTrue(len(words2) > 0)
        self.assertEqual(words1[22], "System")
        self.assertEqual(words2[22], "System.")
Ejemplo n.º 20
0
    def __init__(self, path, nvar, iteration=1500, lr=0.01):
        """

        :param path:
        :param nvar: 变量的数目
        :param iteration:
        :param lr:
        """
        data = DataReader.read(path, nvar + 1)  # nvar + y
        self.y = np.array([data[-1]]).transpose()
        self.x = np.array([np.ones((len(self.y),))] \
                          + [np.array(data[i]) for i in range(nvar)]).transpose()
        self.theta = np.zeros((nvar + 1, 1))
        self.iteration = iteration
        self.lr = lr
        self.nvar = nvar

        self.mu = self.x.mean(0)
        self.s = self.x.max(0) - self.x.min(0)
        self.mu[0] = 0
        self.s[0] = 1  # for x_0: (1 - 0) / 1 = 1
        self.feature_normed = False
Ejemplo n.º 21
0
def read_data(data_path,
              word_word2index,
              char_word2index,
              label_word2index,
              label_type,
              label_bucket,
              max_size=None,
              normalize_digits=True,
              use_lm=False,
              use_elmo=False):
    _buckets = label_bucket[label_type]
    max_length = 0
    data = [[] for _ in _buckets]
    max_char_length = [0 for _ in _buckets]
    print('Reading data from %s' % data_path)
    counter = 0
    reader = DataReader(data_path, word_word2index, char_word2index,
                        label_word2index, use_elmo)
    inst = reader.get_next(normalize_digits)
    while inst is not None and (not max_size or counter < max_size):
        max_length = max(max_length, inst[6])
        counter += 1
        if counter % 10000 == 0:
            print("reading data: %d" % counter)
        inst_size = len(inst[0])
        for bucket_id, bucket_size in enumerate(_buckets):
            if inst_size < bucket_size:
                if use_elmo:
                    words = inst[0]
                else:
                    words = inst[1]
                if use_lm:
                    data[bucket_id].append(
                        [words, inst[3], inst[5], inst[7], inst[8]])
                else:
                    data[bucket_id].append([words, inst[3], inst[5]])
                max_len = max([len(char_seq) for char_seq in inst[2]])
                if max_char_length[bucket_id] < max_len:
                    max_char_length[bucket_id] = max_len
                break
        inst = reader.get_next(normalize_digits)
    reader.close()
    print("Total number of data: %d" % counter)
    print("Max length: %d" % max_length)
    return data, max_char_length
Ejemplo n.º 22
0
# скоростные параметры
learning_rate = 1e-6  # скорость обучения
num_epochs = 10  # количество эпох

input_channels = 1  # входной канал
input_height = 28  # высота
input_width = 28  # ширина
num_classes = 6  # количество классов изображений
# размер изображения, класс изображения
one_layer_net = OneLayerNet(input_height * input_width, num_classes)

#путь деррикториям с картинками
train_dir = "data/train"
test_dir = "data/test"

train_generator = DataReader(train_dir, [input_height, input_width], True,
                             input_channels, num_classes).get_generator()
# берется с помощью DataReader изображения, тренировочную и тестовую выборку
test_generator = DataReader(test_dir, [input_height, input_width], False,
                            input_channels, num_classes).get_generator()

print('Size of training set: {}'.format(
    train_generator.get_data_size()))  # вес тренировочных изображений
print('Size of testing set: {}'.format(
    test_generator.get_data_size()))  # вес тесовых изображенй изображений

print("{} Start training...".format(datetime.now()))  # время начала обучения

# функция ,которая для каждой итерации считает ошибочность распознавания в обучении
for epoch in range(num_epochs):
    print("{} Epoch number: {}".format(datetime.now(), epoch + 1))
    loss = 0
Ejemplo n.º 23
0
class SimpleHiEnv(BaseHiEnv):

    def __init__(self,
                 dataset_name: str,
                 field: str,  # TODO
                 rewards: Dict[str, int],
               ):
        print(f'<SimpleHiEnv>: data set:{dataset_name},field:{field}.')
        # TODO!
        self.field = field  # necessary?

        # init DataReader, and then init sets
        self.reader = DataReader(dataset_name, field)
        self.seeds = self.reader.get_original_seeds()
        self.gt = self.reader.get_gt_set()
        self.current_entity_set = self.seeds.copy()
        self.if_continue = True
        self.candidate_list = []

        # init CGExpan
        device= torch.device("cuda:0")
        self.cgexpan = CGExpan(device, self.reader)  # TODO

        self.rewards = rewards
        print('<SimpleHiEnv>: Env is ready!')

    def state(self) -> Tuple[List[Entity], List[Entity], str]:
        return self.current_entity_set, self.candidate_list, self.field
        
    def if_stop(self):
        return self.if_continue

    def action_expand(self, keys: List[Entity]) -> int:

        expanded = self.cgexpan.expand(keys)


        expanded = unique_by(expanded, lambda c: c.eid)

        # TODO: shuffle
        # if self.sort_candidates:
        #     self._sort_candidates_by_distance_to_keys(l, keys)
        # else:
        #     random.shuffle(l)

        self.candidate_list = expanded[0:40]  
        
        if len(self.candidate_list) < 3:
            self.if_continue = False

        for candidate in self.candidate_list:
            if (candidate in self.gt) or (candidate in self.seeds):
                candidate.ground_truth = True
            else:
                candidate.ground_truth = False
        return 0

    def action_judge(self, answers: List[bool]) -> List[int]:
        # TODO!
        results = []
        for candidate, answer in zip(self.candidate_list, answers):
            # TODO:
            if answer and (candidate not in self.current_entity_set):
                self.current_entity_set.append(candidate)

            if candidate.ground_truth == answer:
                results.append(self.rewards["correct"])
            else:
                results.append(self.rewards["wrong"])

        print("Now current entity set has:", len(self.current_entity_set))

        return results
Ejemplo n.º 24
0
    def __init__(
        self,
        device,
        reader: DataReader,
        k=5,
        gen_thres=3,
        model_name='bert-base-uncased',
    ):
        self.tokenizer = BertTokenizer.from_pretrained(TOKEN_PATH,
                                                       do_lower_case=False)
        self.maskedLM = BertForMaskedLM.from_pretrained(
            BERT_PATH, output_hidden_states=True)

        self.maskedLM.to(device)
        self.maskedLM.eval()

        self.k = k  # TODO
        self.gen_thres = gen_thres  # TODO

        self.reader = reader
        self.eid2name = reader.get_eid2name()
        self.keywords = reader.get_keywords()
        self.eid2idx = reader.get_eid2idx()  # TODO
        self.entity_pos = reader.get_entity_pos()  # TODO
        self.pretrained_emb = reader.get_pretrained_emb()  # TODO

        self.means = np.array(
            [np.mean(emb, axis=0) for emb in self.get_emb_iter()])

        self.inflect = inflect.engine()

        mask_token = self.tokenizer.mask_token

        self.generation_templates = [
            [mask_token, ' such as {} , {} , and {} .', 1],
            ['such ' + mask_token, ' as {} , {} , and {} .', 1],
            ['{} , {} , {} or other ' + mask_token, ' .', 0],
            ['{} , {} , {} and other ' + mask_token, ' .', 0],
            [mask_token, ' including {} , {} , and {} .', 1],
            [mask_token, ' , especially {} , {} , and {} .', 1],
        ]

        self.ranking_templates = [
            '{} such as ' + mask_token + ' .',
            'such {} as ' + mask_token + ' .',
            mask_token + ' or other {} .',
            mask_token + ' and other {} .',
            '{} including ' + mask_token + ' .',
            '{} especially ' + mask_token + ' .',
        ]

        self.expansion_templates = [
            ('', ' such as {} , {} , {} , and {} .'),
            ('such ', ' as {} , {} , {} , and {} .'),
            ('{} , {} , {} , {} or other ', ' .'),
            ('{} , {} , {} , {} and other ', ' .'),
            ('', ' including {} , {} , {} , and {} .'),
            ('', ' , especially {} , {} , {} , and {} .'),
        ]

        self.calculated_cname_rep = {}

        print(f'<CGExpan>: CGExpan is ready!')
Ejemplo n.º 25
0
class RegressionDataset(Dataset):
    def __init__(self, inputs, labels):
        self.inputs = inputs
        self.labels = labels

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, id):
        sample = self.inputs[id], self.labels[id]
        return sample


if __name__ == '__main__':
    fname = "data/AEP_hourly.csv"
    datareader = DataReader(fname)
    X, Y = datareader.get_data()

    dataset = RegressionDataset(inputs=X, labels=Y)
    dataset_loader = DataLoader(dataset,
                                batch_size=8,
                                shuffle=False,
                                num_workers=2)

    for i, [input, label] in enumerate(dataset_loader):
        print(input)
        print(label)
        print()
        if i == 2: break
Ejemplo n.º 26
0
    print('Client connected')


@socket_.on('disconnect_request', namespace='/biometrics')
def disconnect_request():
    @copy_current_request_context
    def can_disconnect():
        disconnect()

    print('Client disconnected')
    emit('my_response', {'data': 'Disconnected!'}, callback=can_disconnect)


if __name__ == '__main__':

    reader = DataReader()

    def send_data():
        while True:
            red, ir, hr, hr_v, spo2, spo2_v = reader.get_values()
            payload = {
                't': round(time()),
                'red': red,
                'ir': ir,
                'hr': hr,
                'hr_v': hr_v,
                'spo2': spo2,
                'spo2_v': spo2_v
            }
            socket_.emit('data', payload, namespace="/biometrics")
            socket_.sleep(0.1)
Ejemplo n.º 27
0
currentdir = os.path.dirname(
    os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)

from datareader import DataReader
import word2vec as wv
import util

file_path = os.path.join(parentdir, "data")
file_path = os.path.join(file_path, "Wiki.txt")
eval_path = os.path.join(parentdir, "evaluation")
eval_path = os.path.join(eval_path, "questions-words-ptbr.txt")

my_data = DataReader(file_path)
my_data.get_data()
word2index = my_data.word2index
index2word = my_data.index2word

BATCH_SIZE = np.array(range(1, 17)) * 10
number_of_exp = len(BATCH_SIZE)
results = []
info = []

for i, bs in enumerate(BATCH_SIZE):
    print("\n ({0} of {1})".format(i + 1, number_of_exp))
    config = wv.Config(batch_size=bs)
    attrs = vars(config)
    config_info = ["%s: %s" % item for item in attrs.items()]
    info.append(config_info)
Ejemplo n.º 28
0
# Объединить все резюме вместе
merged_summary = tf.summary.merge_all()

valid_summary = tf.Summary()

# Инициализируйте FileWriter
writer_1 = tf.summary.FileWriter(filewriter_path + 'train')
writer_2 = tf.summary.FileWriter(filewriter_path + 'validation')

# Инициализируйте заставку для контрольных точек модели чекпоинт
saver = tf.train.Saver()

train_dir = '../data/train'
test_dir = '../data/test'

train_generator = DataReader(train_dir, [input_height, input_width], True,
                             input_channels, num_classes).get_generator()
test_generator = DataReader(test_dir, [input_height, input_width], False,
                            input_channels, num_classes).get_generator()

print('Size of training set: {}'.format(train_generator.get_data_size()))
print('Size of testing set: {}'.format(test_generator.get_data_size()))

train_patterns_per_epoch = train_generator.get_data_size()

# начать сеанс
with tf.Session() as sess:
    # инициализация всех переменных
    sess.run(tf.global_variables_initializer())

    # добавит грфик модели в  TensorBoard
    writer_1.add_graph(sess.graph)
def test_run():
    '''function to test all the utlities'''
    # Define a date range
    dates = pd.date_range('2015-04-02', '2016-04-01')

    # Choose feature symbols to read
    location = os.path.join(base_dir, "BitcoinData")
    symbols = os.listdir(location)

    #build dataframe consisting of all features
    dfreader = DataReader()
    util = Utility()
    location = os.path.join(base_dir, "BitcoinData")
    df = dfreader.get_data(location, symbols, dates)
    df = util.normalize_data(df)

    for index in range(len(symbols)):
        symbols[index] = symbols[index].strip('.csv')

    plotter = DataPlotting()
    #plot dataframe in selected range and given features list
    plotter.plot_selected(df, symbols, '2015-05-01', '2015-06-01')
    #plot dataframe for all given data
    plotter.plot_data(df, "Bitcoin")

    dates = pd.date_range('2010-01-01', '2016-01-01')
    btc_file = "bitcoin-market-price.csv"
    location = os.path.join(base_dir, btc_file)
    df_btc = dfreader.get_btc(location, btc_file, dates)

    stats = Statistics(df)
    rmean = stats.get_rolling_mean(df_btc['bitcoin-market-price'], window=20)
    rstd = stats.get_rolling_std(df_btc.ix[:, 'bitcoin-market-price'], window=20)
    upper_band, lower_band = stats.get_bollinger_bands(rmean, rstd)

    # Plot raw values, rolling mean and Bollinger Bands
    ax = df_btc['bitcoin-market-price'].plot(title="Bollinger Bands", \
                                            label='bitcoin-market-price')
    rmean.plot(label='Rolling mean', ax=ax)
    upper_band.plot(label='upper band', ax=ax)
    lower_band.plot(label='lower band', ax=ax)

    # Add axis labels and legend
    ax.set_xlabel("Date")
    ax.set_ylabel("Price")
    ax.legend(loc='upper left')
    plt.show()

    #compute daily returns
    daily_returns = stats.compute_daily_returns(df_btc)
    plotter.plot_data(daily_returns, title="Daily returns", ylabel="Daily returns")

    daily_returns.replace(to_replace=np.inf, value=np.NaN, inplace=True)
    # Plot a histogram
    daily_returns.hist(bins=21)

    # Get mean as standard deviation
    mean = daily_returns.mean()
    std = daily_returns.std()

    #print type(mean)
    plt.axvline(mean[0], color='w', linestyle='dashed', linewidth=2)
    plt.axvline(std[0], color='r', linestyle='dashed', linewidth=2)
    plt.axvline(-std[0], color='r', linestyle='dashed', linewidth=2)
    plt.show()

    # Scatterplots
    df.plot(kind='scatter', x='hash_rate', y='market_cap')
    beta_XOM, alpha_XOM = np.polyfit(df['hash_rate'], df['market_cap'], 1)  # fit poly degree 1
    plt.plot(df['hash_rate'], beta_XOM*df['market_cap'] + alpha_XOM, '-', color='r')
    plt.show()

    # Calculate correlation coefficient
    correlation = df['avg_block_size'].corr(df['n_tx'], method='pearson')
    print correlation
Ejemplo n.º 30
0
 def setUpClass(cls):
     cls.dr = DataReader()
     cls.words = cls.dr.read_text()
Ejemplo n.º 31
0
import numpy as np
import matplotlib.pyplot as plt

from myutils import msg
from datareader import DataReader
from dist_model import DiscreteNormal as DiscNorm

rating_count = 5
dist_gen = DiscNorm(np.linspace(0.5, 5.5, num=rating_count + 1))

with msg("Getting data"):
    Rtilde = DataReader.get_Rtilde()
    Rvar = DataReader.get_Rvar()
    R = DataReader.get_ratings()
    lam = DataReader.get_lam()
    P = DataReader.get_nyms()


def get_data_dist(data):
    ratings, counts = np.unique(data, return_counts=True)
    dist_data = np.zeros(rating_count)
    dist_data[ratings.astype(int) - 1] = counts / counts.sum()
    return dist_data


def get_err(data, mean, var):
    dist_data = get_data_dist(data)
    dist_model = dist_gen.pmf(mean, var)
    return abs(dist_data / dist_model)

Ejemplo n.º 32
0
from datareader import DataReader
from datawriter import DataWriter
from femgrid import FemGrid
from globaldata import GlobalData
from result import Result
from plotter import Plotter

rMin, alfaAir, tempBegin, tempAir, tauMax, nTime = DataReader.readGlobalData("global_data")
ne, nh, rMax, elements, nodes = DataReader.readElementsData("elements_data", rMin, tempBegin)

globalData = GlobalData(ne, nh, rMin, rMax, alfaAir, tempBegin, tempAir, tauMax, nTime)
globalData.printGlobalData()

femGrid = FemGrid(elements, nodes)
result = femGrid.simulateProcess(globalData)

result.printTemperatures()

DataWriter.writeData("result.txt", result.getTemperatures())
Plotter.plot("czas", "temperatura", femGrid.getTauArray(), result.getTemperatures())


Ejemplo n.º 33
0
class KMeans:

    def __init__(self):
        self.reader = DataReader("yeast.data", 1, 1, " ")

        self.pts, self.mini, self.maxi, self.dimens, self.classes = self.reader.getPoints()

        self.points = []

        for pt, c in zip(self.pts, self.classes):
            point = Point()
            point.position = pt
            point.classe = c
            self.points.append(point)

        self.i = 0
        self.max_iterations = 1

        self.clusters = []

        self.possible_classes = self.get_possible_classes()


    def randomPoint(self):
        point = Point()
        for mi, ma in zip(self.mini, self.maxi):
            point.position.append(uniform(mi, ma))
        return point

    def randomClusters(self, max_clusters):
        for i in range(0, max_clusters):
            cluster = Cluster()
            cluster.centroid = self.randomPoint()
            self.clusters.append(cluster)

    def distanceBetween(self, p1, p2):

        distance = 0
        for d1, d2 in zip(p1, p2):
            distance += (d2-d1) ** 2
        return distance

    def realDistanceBetween(self, p1, p2):
        distance = 0
        for d1, d2 in zip(p1, p2):
            distance += (d2-d1) ** 2
        return sqrt(distance)

    def putPointInClosestCluster(self, point):
        min_distance = self.distanceBetween(point.position, self.clusters[0].centroid.position)
        cur_cluster = self.clusters[0]
        for cluster in self.clusters:
            distance = self.distanceBetween(point.position, cluster.centroid.position)
            if distance < min_distance:
                cur_cluster = cluster
                min_distance = distance
        cur_cluster.points.append(point)

    def assignPointsToClusters(self):
        for point in self.points:
            self.putPointInClosestCluster(point)

    def recalculateCentroids(self):
        for cluster in self.clusters:
            cluster.updateCentroid()

    def clearClusters(self):
        for cluster in self.clusters:
            cluster.points = []

    def clusterToX(self, cluster):
        xarray = []
        for p in cluster.points:
            xarray.append(p.position[0])
        return xarray

    def clusterToY(self, cluster):
        yarray = []
        for p in cluster.points:
            yarray.append(p.position[1])
        return yarray

    def clusterToZ(self, cluster):
        zarray = []
        for p in cluster.points:
            zarray.append(p.position[2])
        return zarray

    def printClusters(self):
        import matplotlib.pyplot as plt
        from mpl_toolkits.mplot3d import Axes3D
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')

        for i, cluster in enumerate(self.clusters):
            if i == 0:
                ax.scatter(self.clusterToX(cluster), self.clusterToY(cluster), self.clusterToZ(cluster), zdir=u'z', s=20, c='b', marker='o')
                ax.scatter([cluster.centroid.position[0],], [cluster.centroid.position[1],], [cluster.centroid.position[2],], zdir=u'z', s=50, c='b', marker='s')
            if i == 1:
                ax.scatter(self.clusterToX(cluster), self.clusterToY(cluster), self.clusterToZ(cluster), zdir=u'z', s=20, c='r', marker='o')
                ax.scatter([cluster.centroid.position[0],], [cluster.centroid.position[1],], [cluster.centroid.position[2],], zdir=u'z', s=50, c='r', marker='s')
            if i == 2:
                ax.scatter(self.clusterToX(cluster), self.clusterToY(cluster), self.clusterToZ(cluster), zdir=u'z', s=20, c='g', marker='o')
                ax.scatter([cluster.centroid.position[0],], [cluster.centroid.position[1],], [cluster.centroid.position[2],], zdir=u'z', s=50, c='g', marker='s')
            if i == 3:
                ax.scatter(self.clusterToX(cluster), self.clusterToY(cluster), self.clusterToZ(cluster), zdir=u'z', s=20, c='y', marker='o')
                ax.scatter([cluster.centroid.position[0],], [cluster.centroid.position[1],], [cluster.centroid.position[2],], zdir=u'z', s=50, c='y', marker='s')
            if i == 4:
                ax.scatter(self.clusterToX(cluster), self.clusterToY(cluster), self.clusterToZ(cluster), zdir=u'z', s=20, c='c', marker='o')
                ax.scatter([cluster.centroid.position[0],], [cluster.centroid.position[1],], [cluster.centroid.position[2],], zdir=u'z', s=50, c='c', marker='s')

        plt.show()

    def get_possible_classes(self):
        classes = []
        for p in self.points:
            if classes.count(p.classe) == 0:
                # print(p.classe)
                classes.append(p.classe)
        return classes

    def rand_index(self):
        cooccurrence_matrix = []

        for possible_class in self.possible_classes:
            matrix_line = []
            for cluster in self.clusters:
                array_of_classes = cluster.to_array_of_classes()
                matrix_line.append(array_of_classes.count(possible_class))
            cooccurrence_matrix.append(matrix_line)

        import numpy as np
        array_matrix = np.array(cooccurrence_matrix)

        tp, fp, tn, fn = get_tp_fp_tn_fn(array_matrix)

        rand_index = float(tp + tn) / (tp + fp + fn + tn)

        precision = float(tp) / (tp + fp)
        recall = float(tp) / (tp + fn)

        return rand_index


    # execution
    def execute(self, args):

        # First Argument is the Number of Clusters
        # All the others are coordinates
        # Example (3, x1,y1,z1, x2,y2,z2, x3,y3,z3)

        self.clusters = []

        iters = int(args[0])

        if iters < 1: iters = 1
        if iters > 5 : iters = 5

        for i in range (0, iters):
            cluster = Cluster()
            cluster.centroid.position = args[(i * self.dimens) + 1: (i * self.dimens) + self.dimens + 1]
            self.clusters.append(cluster)
        i = 0

        while i < self.max_iterations:
            self.clearClusters()
            self.assignPointsToClusters()
            # self.recalculateCentroids()
            i += 1


        rand_index = self.rand_index()

        return rand_index