Ejemplo n.º 1
0
def attempt_and_hint_process(data):
    print('==> remove records whose attempt_account is more than 15')
    data = data[data['attempt_count'] <= 15]
    data = data.reset_index(drop=True)

    problem_list = np.unique(data['problem_id'])
    attempt_dict = {}
    hint_dict = {}
    attempt_list = []
    hint_list = []
    for idx in pp.prog_percent(
            range(len(problem_list)),
            stream=sys.stdout,
            title='==> get attmept and hint max value at problem level'):
        temp_data = data[data['problem_id'] == problem_list[idx]]
        attempt_dict[problem_list[idx]] = max(temp_data['attempt_count'])
        attempt_list.append(max(temp_data['attempt_count']))
        hint_dict[problem_list[idx]] = max(temp_data['hint_count'])
        hint_list.append(max(temp_data['hint_count']))

    fig, axs = plt.subplots(nrows=2, ncols=1, sharex=False)
    ax = axs[0]
    ax.hist(attempt_list, bins=np.arange(0, 16, 1))
    ax.set_title('max attempt distribution')
    ax.set_xlabel("attempt(max)")
    ax.set_ylabel("number")

    ax = axs[1]
    ax.hist(hint_list)
    ax.set_title("max hint distribution")
    ax.set_xlabel("hint(max)")
    ax.set_ylabel("number")

    plt.savefig('./result/assistment2009/attempt_hint_number_' +
                datetime.datetime.now().strftime("%Y-%m-%d-%H-%M") + '.png')

    for idx in pp.prog_percent(
            range(len(data)),
            stream=sys.stdout,
            title='==> cast attempt  count and hint count to value/max'):
        if attempt_dict[data.loc[idx, 'problem_id']] == 0:
            data.loc[idx, 'attempt_count_level'] = -1
        else:
            data.loc[
                idx,
                'attempt_count_level'] = data.loc[idx, 'attempt_count'] / (
                    attempt_dict[data.loc[idx, 'problem_id']] * 1.0)

        if hint_dict[data.loc[idx, 'problem_id']] == 0:
            data.loc[idx, 'hint_count_level'] = -1
        else:
            data.loc[idx, 'hint_count_level'] = data.loc[idx, 'hint_count'] / (
                hint_dict[data.loc[idx, 'problem_id']] * 1.0)

    return data
Ejemplo n.º 2
0
def normalization_continues_data(data):
    print('==> normalize continues data')
    columns_name_list = ["attempt_count", "time", "hint_count"]
    data = data.reset_index(drop=True)

    size = len(data)
    for column_name in columns_name_list:
        if column_name == "time":
            bins = [-1, 60, 300, 1200, 3600, 60000000]
            data[column_name] = pd.cut(data[column_name], bins, labels=False)
            tmpList = []

            for i in pyprind.prog_percent(range(size),
                                          stream=sys.stdout,
                                          title=column_name):
                try:
                    tmp = int(data.loc[i, column_name])
                except:
                    tmp = 0
                    # raise ValueError(str(data.loc[i, column_name])+"_"+str(i))
                tmpList.append(math.log((tmp + 2), 6))
            data['time_normal'] = tmpList
        elif column_name == "attempt_count":
            bins = [-10, 1, 20, 100, 40000]
            data[column_name] = pd.cut(data[column_name], bins, labels=False)
            data[column_name] += 1
            tmpList = []

            for i in pyprind.prog_percent(range(size),
                                          stream=sys.stdout,
                                          title=column_name):
                # print ("attempt_count\t",str(i))
                tmp = int(data.loc[i, column_name])
                tmpList.append(math.log((tmp + 1), 5))
            data['attempt_count_normal'] = tmpList
        elif column_name == "hint_count":
            bins = [-1, 0, 2, 4, 3000]
            data[column_name] = pd.cut(data[column_name], bins, labels=False)
            data[column_name] += 1
            tmpList = []
            for i in pyprind.prog_percent(range(size),
                                          stream=sys.stdout,
                                          title=column_name):
                try:
                    tmp = int(data.loc[i, column_name])
                except:
                    tmp = 0
                tmpList.append(math.log((tmp + 1), 5))
            data['hint_count_normal'] = tmpList
        else:
            raise ValueError("check your continus_columns parameter!")
    return data
Ejemplo n.º 3
0
def add_cross_feature_to_dataset(dataset, dp):
    if len(dp.dataset_columns_for_cross_feature) == 0:
        print("==> no need to add cross feature to dataset")
        return dataset
    else:
        print("==> add cross feature to dataset")
        columns_max, columns_numb, _ = get_columns_info(dataset)
        d_size = len(dataset)
        for item in dp.dataset_columns_for_cross_feature:
            print("==> add", aux.connectStringfromList(item))
            temp = []
            for i in pyprind.prog_percent(range(d_size),
                                          stream=sys.stdout,
                                          title=item):
                if len(item) == 2:
                    value = dataset.loc[i, item[0]] + dataset.loc[
                        i, item[1]] * (columns_max[item[0]] + 1)
                elif len(item) == 3:
                    value = dataset.loc[i, item[0]] + dataset.loc[i, item[1]] * (columns_max[item[0]] + 1) + \
                            dataset.loc[i, item[2]] * (columns_max[item[0]] + 1) * (columns_max[item[1]] + 1)
                else:
                    raise ValueError('cross features only support 3 at most')
                temp.append(value)
            dataset[aux.connectStringfromList(item)] = temp
        return dataset
Ejemplo n.º 4
0
def connectUser(data, connected_file_name):
    print("==> load data successful")
    u, c = counter(data['user_id'])
    # UserNumberDict = dict(zip(u, c))

    userQuesNumIndexList = getUserQuesNumIndexList(data['user_id'])
    newdata = pd.DataFrame()

    print('==> begin concatenate dataset')
    for i in pp.prog_percent(range(len(u)), stream=sys.stdout):
        for k in range(len(userQuesNumIndexList)):
            if userQuesNumIndexList[k, 0] == u[i]:
                temp = data.iloc[int(userQuesNumIndexList[
                    k, 2]):int(userQuesNumIndexList[k, 2] +
                               userQuesNumIndexList[k, 1])]
                newdata = newdata.append(temp)

    newdata.reset_index(drop=True)
    newdata.to_csv(connected_file_name, index=False)

    print(
        '==> before connect\t',
        aux.stastic_SecNumber_UserNumber_SkillNumber(data,
                                                     code0.DatasetParameter()))
    print(
        '==> after connect\t',
        aux.stastic_SecNumber_UserNumber_SkillNumber(newdata,
                                                     code0.DatasetParameter()))

    return newdata
Ejemplo n.º 5
0
def orthogonal_averaged(pix_map, traces, ts, shape, contribution_f=None, fill_value = 0):
    if not contribution_f:
        contribution_f = contribution_function

    frame_involved, frame_dists, pixel_list = pix_map

    # improves ETA calculation
    random.shuffle(pixel_list)

    contributions = np.zeros(frame_dists.shape, dtype=object)
    for p in pixel_list:
        contributions[p] = [contribution_f(d) for d in frame_dists[p]]
        contributions[p] /= np.sum(contributions[p])

    # frame = np.zeros(shape)
    #dim = len(shape)
    #sel = tuple(np.array(pixel_list).T)

    video = np.full((len(ts),) + shape, fill_value, dtype=np.float32)
    sl = (slice(np.alen(video)),)
    for p in prog_percent(pixel_list):
        # frame[p] = np.sum(traces[frame_involved[p], ts]*contributions[p])
        traces_in_p = traces[frame_involved[p]][:,ts]
        traces_weighed = (traces_in_p.T*contributions[p]).T
        video[sl + p] = np.sum(traces_weighed, axis=0)

        # does not work
#        frame[sel] = np.sum(traces[frame_involved[sel], t]*contributions[sel], axis=dim)
    return video
Ejemplo n.º 6
0
	def BuildIndex(self):
		def bigram(title):
			bigram = (title.split(',')[0], title.split(',')[1].replace('.', ''))
			title = re.sub(r'\(.*\)', '', title.split(',')[0]).split()[0].strip()
			bigram += (title, )
			if len(title) > 2:
				prefix = title[0]
				for i in range(1, len(title)):
					if title[i:].count(title[i]) == 1:
						bigram += (prefix + title[i],)
			return bigram

		tmp = dict()
		for i in pyprind.prog_percent(Course.objects.all()):
			key = bigram(i.title)
			titleTerms = self.title2terms(i.title)
			CourseCode = i.code

			for k in key:
				tmp.setdefault(k, set()).add(CourseCode)
			for t in titleTerms:
				tmp.setdefault(t, set()).add(CourseCode)
			tmp.setdefault(i.professor, set()).add(CourseCode)
			tmp.setdefault(CourseCode, set()).add(CourseCode)

		result = tuple( {'key':key, self.school:list(value)} for key, value in tmp.items() if key != '' and key!=None)

		self.SrchCollect.remove({})
		
		self.SrchCollect.insert(result)
		self.SrchCollect.create_index([("key", pymongo.HASHED)])
    def register_whole_stack(self,
                             save_path="",
                             max_cell_radius=5,
                             cell_size_iterations=5,
                             cell_rel_threshold=0.4,
                             min_cell_intensity=20,
                             cell_overlap=0):

        #disp = []
        #self.aligned_frame = [None] * self.nt
        self.displacement = np.empty([self.nt, 3])
        self.registered_stack = np.empty((self.nt, self.nz, self.nx, self.ny),
                                         dtype=np.uint16)
        self._prepare_std_deviation_and_invalid_frames_and_result()
        self._prepare_invalid_frames()

        register_queue = self._create_queue(self._align_frame_worker,
                                            self.thread_count * 2)

        for t in prog_percent(range(self.nt)):
            register_queue.put(
                [self.read_frame(t), t, self.registered_stack[t], t])
        register_queue.join()

        return self.displacement, self.registered_stack
Ejemplo n.º 8
0
def plane_wise(arguments, z_range=None, print_output=False):
    image = io.load(arguments.input_file)
    ref = io.load(arguments.reference)
    r1 = util.randomword(20)
    r2 = util.randomword(20)
    tmpf = tempfile.gettempdir()

    def plane_name(r, z):
        return os.path.join(tmpf, r + '_z%d.nrrd' % z)

    if not z_range:
        z_range = np.arange(0, image.shape[0])

    warped = []
    results = []
    for z in prog_percent(z_range):
        inp_plane = image[z]
        ref_plane = ref[z]
        inp_fn = plane_name(r1, z)
        ref_fn = plane_name(r2, z)
        io.save(inp_fn, inp_plane)
        io.save(ref_fn, ref_plane)
        arguments_z = copy.deepcopy(arguments)
        arguments_z.input_file = inp_fn
        arguments_z.reference = ref_fn
        arguments_z.dimensions = 2
        result = run_antsreg(arguments_z)
        results.append(result)
        warped.append(result.load_warped())

    return img.cmp_images(warped), results
Ejemplo n.º 9
0
def run_gridsearch(*args, **kwargs):
    f, configs = grid_search(*args, **kwargs)
    results = []
    for i, c in prog_percent(list(enumerate(configs))):
        result = f(c)
        results.append(result)
    return results
Ejemplo n.º 10
0
def planewise_affine(fixed, moving, return_transforms=False):
    zshift = get_zshift(fixed, moving)

    fixed = to_numpy(fixed)
    moving = to_numpy(moving)

    size_z = fixed.shape[0]

    warped = np.zeros_like(fixed)
    transforms = [None]*size_z
    for z in prog_percent(list(range(max((0, -zshift)), min((size_z, -zshift + size_z))))):
        mov = ants.from_numpy(moving[z + zshift].swapaxes(0, 1))
        fix = ants.from_numpy(fixed[z].swapaxes(0, 1))
        res = ants.registration(mov, fix,
                                type_of_transform='Affine',
                                reg_iterations=[500, 500, 500],
                                grad_step=.1,
                                verbose=True)
        t = ants.read_transform(res['fwdtransforms'][0])
        transforms[z] = t
        trans = ants.apply_ants_transform_to_image(t, mov, fix)
        warped[z] = trans.numpy().swapaxes(0, 1)

    if return_transforms:
        return warped, (transforms, zshift)

    return warped
Ejemplo n.º 11
0
    def run(self):
        if not self.prepared:
            self.prepare()

        frame_stack = np.zeros(self.lif_shape, np.uint16)
        ts = list(range(self.nf))

        for f in prog_percent(ts):
            for z in range(self.lif_shape[1]):
                plane = self.ir.read(z=z,
                                     t=f,
                                     c=0,
                                     series=self.lif_idx,
                                     rescale=False)
                frame_stack[f,
                            z] = toCPU(self.alignPlaneGPU(toGPU(plane), z, f))
        """
        self.frameCount -= len(self.invalidFrames)
        self.stdDeviation = toCPU(th.sqrt((self.sumSqTensor[z] - self.sumTensor[z]**2/self.frameCount)/(self.frameCount-1)))
        """

        #stdDevDiffTensor = toGPU(sumSqStdDev[z]) + sumSqTensor[z] - (toGPU(sumStdDev[z]) + sumTensor[z])**2/frameCount
        #stdDeviation = toCPU(th.sqrt(stdDevDiffTensor**2/(frameCount-1)))

        return dict(aligned=frame_stack, shifts=self.shifts)
Ejemplo n.º 12
0
def time_basic_process(data):
    # -1-transfer to second unit
    print("==> transfer time unit: millsecond to second")
    tempTimeList = list(data['time'])
    newTimeList = [int(x / 1000) for x in tempTimeList]
    data['time'] = newTimeList
    del newTimeList, tempTimeList

    # -2-remove outlier records
    print('==> delete outlier of time feature')
    print('==> length before delete\t', len(data))
    data = data[(data['time'] <= code0.DatasetParameter().time_threshold)
                & (data['time'] > 0)]
    print('==> length after delete\t', len(data))

    # -3-transfer to z-score
    time_z_level = code0.DatasetParameter().time_z_level
    print('==> preprocerss time to z-score based on ', time_z_level)
    time_z_id_set = np.unique(data[time_z_level])
    std_dict = {}
    mean_dict = {}
    for itme_id in pp.prog_percent(time_z_id_set,
                                   stream=sys.stdout,
                                   title='==> extract mean and std of time'):
        temp_data = data[data[time_z_level] == itme_id]
        temp_list = list(temp_data['time'])
        # print ('-- problem_id ',problem_id,' -- ',len(temp_list),' --')
        std_dict[itme_id] = np.std(temp_list, axis=0)
        mean_dict[itme_id] = np.mean(temp_list, axis=0)

    assert len(std_dict) == len(mean_dict)

    data = data.reset_index(drop=True)
    for id in pp.prog_percent(range(len(data)),
                              stream=sys.stdout,
                              title='==> cast time to z-score'):
        data.loc[id, 'time'] = (data.loc[id, 'time'] -
                                mean_dict[data.loc[id, time_z_level]]) / (
                                    std_dict[data.loc[id, time_z_level]] * 1.0)

    data = data.fillna(0)
    """
    plt.hist(list(data['time']), bins=np.arange(min(data['time']), max(data['time']), code0.DatasetParameter().time_interval*2))
    plt.title("time z score distribution")
    plt.savefig('./result/assistment2009/time_distribution' + str(datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")) + '.png')
    """
    return data
Ejemplo n.º 13
0
def orthogonal(rois, traces, color_func, ts, shape):
    activity = np.zeros((np.alen(ts),) + tuple(shape) + (3,), dtype=np.uint8)

    for i, t in prog_percent(list(enumerate((ts)))):
        for roi_id, (roi, trace) in enumerate(zip(rois, traces)):
            x, y, z, _ = roi
            activity[i, y, x] = color_func(trace[t])

    return activity
Ejemplo n.º 14
0
def slice_series(fn, z, ts):
    for new_t, t in prog_percent(list(enumerate(ts))):
        frame = io.get_frame(fn, t)
        if new_t == 0:
            new_shape = list(frame.shape)[1:]
            new_shape = [len(ts)] + new_shape
            new = np.zeros(new_shape)
        new[new_t, :, :] = frame[z]

    return new
Ejemplo n.º 15
0
def cut_series(fn, ts):
    for new_t, t in prog_percent(list(enumerate(ts))):
        frame = io.get_frame(fn, t)
        if new_t == 0:
            new_shape = list(frame.shape)
            new_shape.insert(1, len(ts))
            new = np.zeros(new_shape)
        new[:, new_t, :, :] = frame

    return new
Ejemplo n.º 16
0
def prediction(model_0, data_test, label_test):

    well_sorted = 0
    model_0 = load_model('%s/clusters_saves/iteration_0/save_cluster_0/save' %
                         root_path)  # model 0

    list_dir_iteration = sorted(os.listdir(cluster_save_dir),
                                key=lambda k: int(k.split("_")[-1]))
    number_images = len(data_test)
    for row in pyprind.prog_percent(range(0, number_images)):  #for each image

        Y_prob = model_0.predict(data_test[row][np.newaxis, :])
        Y_classes = Y_prob.argmax(
            axis=1)  # predicts the image label based on the best probability
        label_name = all_label_names[all_label_numbers.index(Y_classes)]

        for iteration in list_dir_iteration:  #for each iteration directory

            species_found = False

            list_clusters = sorted(os.listdir(root_path + '/clusters_saves' +
                                              '/' + iteration),
                                   key=lambda k: int(k.split("_")[-1]))

            for cluster in list_clusters:  #for each cluster in the iteration directory

                if not species_found:
                    cluster_species = np.genfromtxt(
                        root_path + '/clusters_saves' + '/' + iteration + '/' +
                        cluster + '/labels.csv',
                        dtype=None,
                        encoding=None)[1:]
                    if label_name in cluster_species:
                        species_found = True
                        model = load_model(root_path + '/clusters_saves' +
                                           '/' + iteration + '/' + cluster +
                                           'save')
                        Y_prob = model.predict(data_test[row][np.newaxis, :])
                        Y_classes = Y_prob.argmax(
                            axis=1
                        )  # predicts the image label based on the best probability
                        ID = int(cluster.split('_')[-1])
                        label_name = all_index_with_ID[
                            all_index_with_ID.index(ID) +
                            1][Y_classes][1]  #image's label name

        true_label = np.where(
            label_test[row][np.newaxis, :] == 1)[1][0]  # theoretical label
        true_label_name = all_label_names[all_label_numbers.index(true_label)]

        if true_label_name == label_name: well_sorted += 1
        if not row == 0: print(well_sorted / row)  # progress

    acc = well_sorted / number_images
    return acc
Ejemplo n.º 17
0
    def loadfiles(self):
        stack = []

        for fn in prog_percent(self.files):
            im = cv2.imread(fn, 0).astype(np.float32)
            if im.shape == (488, 648):
                im = im[4:-4, 4:-4]

            stack.append(im)

        return np.array(stack, dtype=np.float32)
Ejemplo n.º 18
0
def score_results(results, metric='MI[$REF_IN,1,32,Regular,0.25]'):
    def func(res):
        warped = res.get_warped()
        ref = res.arguments.reference
        sim = measure_similarity(ref, warped, metric)
        return sim, res

    print('Scoring')
    scored = list(map(func, prog_percent(results)))
    scored.sort(key=lambda e: e[0])

    return scored
Ejemplo n.º 19
0
def main(argv):
    folder = '/Users/koesterlab/registered/control/'
    files = glob(folder + '*_aligned.h5')

    #files = fileinput.input()
    for f in prog_percent(files):
        try:
            r = re.compile(r'^' + folder + '(?P<fn>.*)_aligned.h5')
            m = r.match(f)
            fn = m.group('fn')
            process_file(folder + fn)
        except Exception as e:
            print(e)
Ejemplo n.º 20
0
def lif_read_stack(fn):
    ir = lif_open(fn)
    img_i = lif_find_timeseries(fn)
    shape = get_shape(fn, img_i)

    stack = np.empty(shape, dtype=np.uint16)

    # Load the whole stack...
    for t in prog_percent(range(stack.shape[0])):
        for z in range(stack.shape[1]):
            stack[t, z] = ir.read(t=t, z=z, c=0, series=img_i, rescale=False)

    return stack
Ejemplo n.º 21
0
def time_basic_process(data):
    # -1-transfer time to 'integar' from 'str'
    # -2-remove outlier records
    old_time_list = list(data['time'])
    new_time_list = []
    for i in old_time_list:
        kp = int(float(i))
        if kp > 150: kp = 150
        new_time_list.append(kp)
    data['time'] = new_time_list

    # -3-transfer to z-score
    time_z_level = 'skill_id'
    print('==> preprocerss time to z-score based on ', time_z_level)
    time_z_id_set = np.unique(data[time_z_level])
    std_dict = {}
    mean_dict = {}
    for itme_id in pp.prog_percent(time_z_id_set,
                                   stream=sys.stdout,
                                   title='==> extract mean and std of time'):
        temp_data = data[data[time_z_level] == itme_id]
        temp_list = list(temp_data['time'])
        # print ('-- problem_id ',problem_id,' -- ',len(temp_list),' --')
        std_dict[itme_id] = np.std(temp_list, axis=0)
        mean_dict[itme_id] = np.mean(temp_list, axis=0)

    assert len(std_dict) == len(mean_dict)

    data = data.reset_index(drop=True)

    for id in pp.prog_percent(range(len(data)),
                              stream=sys.stdout,
                              title='==> cast time to z-score'):
        data.loc[id, 'time'] = (data.loc[id, 'time'] -
                                mean_dict[data.loc[id, time_z_level]]) / (
                                    std_dict[data.loc[id, time_z_level]] * 1.0)

    return data
Ejemplo n.º 22
0
def attemp_hint_and_correctness_analysis(data):
    data = data.reset_index(drop=True)
    bins = np.concatenate([[-1], np.arange(0.0, 1.1, 0.1)])

    for attri in ['hint_count_level', 'attempt_count_level']:
        correct_mean_list = []
        correct_std_list = []
        correct_num_list = []

        for item_index in pp.prog_percent(
                range(len(bins)),
                stream=sys.stdout,
                title='==> get correctness according to ' + attri):
            up_bin = bins[item_index] + 0.05
            down_bin = bins[item_index] - 0.05

            temp_data = data[(data[attri] >= down_bin)
                             & (data[attri] < up_bin)]
            temp_correct_list = list(temp_data['correct'])
            correct_num_list.append(len(temp_correct_list))

            if (len(temp_correct_list) != 0):
                correct_mean_list.append(np.mean(temp_correct_list, axis=0))
                correct_std_list.append(np.std(temp_correct_list, axis=0))
            else:
                correct_mean_list.append(0)
                correct_std_list.append(0)

        fig, axs = plt.subplots(nrows=2, ncols=1, sharex=True)
        ax = axs[0]
        ax.plot(bins, correct_mean_list)
        ax.set_title('correctness ' + attri)

        boundary_list = code0.DatasetParameter().correct_boundary_list
        for nmber in boundary_list:
            ax.axhline(y=nmber,
                       xmin=0,
                       xmax=1,
                       c="red",
                       linewidth=0.5,
                       zorder=0)

        ax = axs[1]
        ax.plot(bins, correct_num_list)
        ax.set_title(attri + " number distribution")
        ax.set_xlim([-1.1, 1.1])
        plt.savefig('./result/assistment2009/' + attri + '_correctness_' +
                    str(datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")) +
                    '.png')
    def run(self, number_of_runs=1, max_generations=None):
        # Some Stats
        number_lost = 0
        num_of_generations = []

        if max_generations is not None:
            self.max_generations = max_generations

        graph.ylabel('Proportion of p')
        graph.xlabel('Number of Generations')

        # Selection With Drift
        for i in pyprind.prog_percent(range(number_of_runs)):
            results, lost = self.single_run()
            num_of_generations.append(len(results))
            number_lost += lost
            graph.plot(np.array(results) / self.allele_pool_size, linewidth=1.0, alpha=0.4)

        num_of_generations = np.array(num_of_generations)

        graph.plot(
            [0, num_of_generations.max()],
            [1, 1],
            linewidth=1.0,
            linestyle='--',
            label='Fixation Point',
            color='k'
        )

        # Selection Alone (Benchmark)
        graph.plot(
            np.array(self.selection_only_benchmark(break_point=num_of_generations.max())) / self.allele_pool_size,
            linewidth=1.5,
            color='k',
            label='Selection Only (Non-Discrete)'
        )

        graph.title(
            '{} Simulation ({}% of runs lost allele p)'.format(
                self.type_of_trait,
                round(number_lost * 100 / number_of_runs, 2)
            )
        )
        graph.legend(loc=4)
        graph.xlim(0, num_of_generations.max())
        graph.ylim(0, 1.05)
        graph.show()

        return num_of_generations, number_lost
Ejemplo n.º 24
0
def run_ae_epoch(sess, model, data, TrainConfig):
    batch_number = int(len(data) / (TrainConfig.batch_size * TrainConfig.num_steps))
    learning_rate  = TrainConfig.learning_rate
    for i in pyprind.prog_percent(range(batch_number), stream=sys.stdout):
        x = np.zeros((TrainConfig.batch_size, TrainConfig.num_steps, TrainConfig.seq_width))
        kindex = i * (TrainConfig.batch_size * TrainConfig.num_steps)
        for ip in range(TrainConfig.batch_size):
            for j in range(TrainConfig.num_steps):
                x[ip, j, :] = data.iloc[kindex]
                kindex += 1
        #mask_np = np.random.binomial(1, 1 - TrainConfig.corruption_level, [TrainConfig.batch_size * TrainConfig.num_steps,TrainConfig.seq_width])
        learning_rate = learning_rate*TrainConfig.lr_decay
        if learning_rate<=TrainConfig.min_lr:
            learning_rate = TrainConfig.min_lr
        _ = sess.run(model.optimizer, feed_dict={model.inputs: x})
    avgcost = sess.run(model.avgcost, feed_dict={model.inputs: x})
    return avgcost
Ejemplo n.º 25
0
def attempt_add_level_process(data):
    """
    based on correctness and attempt relationship
    0 - attempt: 0 - 0
    1 - attempt: 1 - 81.7%
    2 - attempt: 2 -
    3 - attempt: 0 - 0
    """
    temp_list = []

    for item in pp.prog_percent(list(data['attempt_count']),
                                stream=sys.stdout,
                                title='==> cast attmept to attempt_level'):
        if item == 0:
            temp = 0
        elif item == 1:
            temp = 1
        else:
            temp = 2

        temp_list.append(temp)
    data['attempt_level'] = temp_list
    return data
Ejemplo n.º 26
0
def Transfer_data(dataset, dp, ap):
    g = tf.Graph()
    with g.as_default():
        inputs = tf.placeholder(tf.float32,
                                [ap.batch_size, ap.num_steps, dp.seq_width])
        m = ONEHOTENCODERINPUT(ap, dp, inputs, printControl=False)

    with tf.Session(graph=g) as sess:
        iterations = int(len(dataset) / (ap.batch_size * ap.num_steps))
        dataset = dataset.as_matrix()
        x_sum = []
        for j in pyprind.prog_percent(range(iterations),
                                      title="transfer data"):
            tmpData = dataset[j * ap.batch_size * ap.num_steps:(j + 1) *
                              ap.batch_size * ap.num_steps, :]
            record_content = tmpData.reshape(
                [ap.batch_size, ap.num_steps, dp.seq_width])
            tmpResult = sess.run(m.get_init_value_for_train_weights(),
                                 feed_dict={inputs: record_content})
            if j == 0:
                x_sum = tmpResult
            else:
                x_sum = np.vstack([x_sum, tmpResult])
    return x_sum
Ejemplo n.º 27
0
def test_generator():
    for i in pyprind.prog_percent(range(n), stream=sys.stdout):
        time.sleep(sleeptime)
Ejemplo n.º 28
0
def time_add_level_process(data):
    time_interval = 0.025
    boundary_list = [0.5, 0.7]
    data = data.reset_index(drop=True)
    bins = np.arange(min(data['time']), max(data['time']), time_interval * 2)

    correct_mean_list = []
    correct_std_list = []
    correct_num_list = []
    for item_index in pp.prog_percent(range(len(bins)),
                                      stream=sys.stdout,
                                      title='==> get correctness'):
        up_bin = bins[item_index] + time_interval
        down_bin = bins[item_index] - time_interval

        temp_data = data[(data['time'] >= down_bin) & (data['time'] < up_bin)]
        temp_correct_list = list(temp_data['correct'])
        """
        if up_bin<=-1:
            print ("---"*20)
            print ("*\t",down_bin)
            print ("*\t",up_bin)
            print (temp_correct_list)
            #print (temp_data)
            print ("---"*20)
        """

        correct_num_list.append(len(temp_correct_list))
        if (len(temp_correct_list) != 0):
            if np.mean(temp_correct_list, axis=0) > 1:
                print("******\t", np.mean(temp_correct_list, axis=0), "\t",
                      temp_correct_list)
            correct_mean_list.append(np.mean(temp_correct_list, axis=0))
            correct_std_list.append(np.std(temp_correct_list, axis=0))
        else:
            correct_mean_list.append(0)
            correct_std_list.append(0)

    # plot the relationship
    fig, axs = plt.subplots(nrows=2, ncols=1, sharex=True)
    ax = axs[0]
    ax.plot(bins, correct_mean_list, "r.")
    ax.set_title('correctness')

    for nmber in boundary_list:
        ax.axhline(y=nmber, xmin=0, xmax=1, c="red", linewidth=0.5, zorder=0)

    ax = axs[1]
    ax.plot(bins, correct_num_list, "b--")
    ax.set_title("time z score distribution")

    ax.set_xlim([-2, 4])
    plt.savefig('./result/cmu_stat_f2011/time_distribution_correctness_' +
                str(datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")) +
                '.png')
    #plt.show()

    # add a colum according to correctness boundary
    time_level_list = []
    temp_list = list(data['time'])
    bd = [-1.2, -0.7, 0.75]

    # 0 ~        time < -1.2
    # 1 ~ -1.2 < time < -0.7
    # 2 ~ -0.7 < time < 0.75
    # 3 ~ 0.75 < time
    for idx in range(len(temp_list)):
        if temp_list[idx] <= bd[0]:
            time_level_list.append(0)
        elif (bd[0] < temp_list[idx] and temp_list[idx] <= bd[1]):
            time_level_list.append(1)
        elif (bd[1] < temp_list[idx] and temp_list[idx] <= bd[2]):
            time_level_list.append(2)
        elif (temp_list[idx] > bd[2]):
            time_level_list.append(3)
        else:
            raise Exception("Error in time division")
    print("==> add time_level")
    data['time_level'] = time_level_list
    return data
Ejemplo n.º 29
0
 
print('\n%s' % (80 * '='))
print('%s\n' % (80 * '='))
print('Testing stdout Stream\n')

perc = pyprind.ProgPercent(n, stream=sys.stdout)
for i in range(n):
    perc.update()


print('\n%s' % (80 * '='))
print('%s\n' % (80 * '='))
print('Testing Percentage Indicator Generator\n')

for i in pyprind.prog_percent(range(n), stream=sys.stdout):
    # do something
    pass


print('\n%s' % (80 * '='))
print('%s\n' % (80 * '='))
print('Testing monitor function\n')

perc = pyprind.ProgPercent(n, monitor=True)
for i in range(n):
    perc.update()
print(perc)


print('\n%s' % (80 * '='))
Ejemplo n.º 30
0
import sys
sys.path.append(CAFFE_ROOT + '/python/')
import caffe
import pyprind

images = map(lambda x: IMAGES_PREFIX + x.split(',')[0] + '.jpg',
             open(DATA_FILE, 'r').readlines()[1:])
print len(images)



NPY_FILENAME = 'pred_test_{}.npy'.format(SCALE)



caffe.set_mode_gpu()
net = caffe.Classifier(model_file=MODEL_FILENAME,
                       pretrained_file=PRETRAINED,
                       image_dims=[SCALE, SCALE],
                       raw_scale=255,
                       mean=np.array([104, 117, 123]),
                       channel_swap=(2,1,0))

preds = np.empty((len(images), 2))

for i in pyprind.prog_percent(range(0, preds.shape[0], BATCH_SIZE)):
    i2 = min(i+BATCH_SIZE, preds.shape[0])
    imgs = [caffe.io.load_image(img) for img in images[i:i2]]
    preds[i:i2, ...] = net.predict(imgs, oversample=True, interp_order=3)
np.save(NPY_FILENAME, preds)
Ejemplo n.º 31
0
def run(args):
    if args.files:
        files = args.files
    elif args.listfile:
        with open(args.listfile) as f:
            files = list(map(str.strip, f.readlines()))
    else:
        parser.print_usage()
        sys.exit(0)

    failed = False
    for f in files:
        ext = os.path.splitext(f)
        if not os.path.exists(f):
            eprint('Error: %s does not exist.' % f)
            failed = True
        elif not (ext[1] == '.h5' or ext[1] == '.hdf5'):
            eprint('Error: %s is not a hdf5 file.' % f)
            failed = True
        elif not os.path.isfile(f):
            eprint('Error: %s is not a file.' % f)
            failed = True
    if failed:
        sys.exit(1)

    sub = None
    if args.substitute:
        split = args.substitute.split(':')
        if len(split) != 2:
            eprint(
                'Error: invalid subsitution syntax "%s". Syntax is "replace:with".'
                % args.substitute)
            failed = True
        sub = split
    if args.destdir:
        if not os.path.isdir(args.destdir):
            eprint('Error: destination dir "%s" does not exist.' %
                   args.destdir)
            failed = True
    if failed:
        sys.exit(1)

    bases = []
    shifts_fns = []
    for f in files:
        base_name = os.path.splitext(f)[0]

        remove_suffix = '_aligned'
        if base_name.endswith(remove_suffix):
            base_name = base_name[:-len(remove_suffix)]

        shifts_fn = base_name + '_shifts.npy'
        shifts_fns.append(shifts_fn)
        if not args.no_shifts and not os.path.exists(shifts_fn):
            eprint('Error: "%s" does not exist.' % shifts_fn)
            failed = True

        if args.destdir:
            base_name = os.path.join(args.destdir, os.path.basename(base_name))
            bases.append(base_name)
        elif sub:
            if base_name.find(sub[0]) == -1:
                eprint(
                    'Error: filename "%s" does not contain "%s" for substitution.'
                    % (f, sub[0]))
                failed = True
            base_name = base_name.replace(*sub)
            bases.append(base_name)
        else:
            bases.append(base_name)
    if failed:
        sys.exit(1)

    necessary = []

    if not args.no_verbose:
        print('Arguments look good. This will be processed:')
    for f, b in zip(files, bases):
        this_necessary = not all([os.path.isfile(b + s)
                                  for s in SUFFIXES]) or args.overwrite
        necessary.append(this_necessary)

        if not args.no_verbose:
            print(('' if this_necessary else '[SKIP] ') + f)
            for suffix in SUFFIXES:
                print((' -> ' if this_necessary else '[ALREADY EXISTS] ') +
                      '%s%s' % (b, suffix))
            print()

    necessary_files = [
        (f, shifts_fn, b)
        for f, shifts_fn, b, n in zip(files, shifts_fns, bases, necessary) if n
    ]

    if len(necessary_files) == 0:
        print('Nothing to process.')
        sys.exit(0)

    template = segmentation.load_template()
    for f, shifts_fn, b in prog_percent(necessary_files):
        print(f)
        print('=' * len(f))

        try:
            base = b
            if not args.no_shifts:
                print('Loading shifts...')
                shifts = np.load(shifts_fn)
                shift_dists = np.sqrt(np.sum(np.square(shifts), axis=1))
            print('Loading stack...')
            stack = dd.io.load(f)
            print('Computing std...')

            if not args.no_shifts:
                invalid_frames = [
                    i for i in np.arange(np.alen(stack))
                    if shift_dists[i] > args.shift_threshold
                ]
            else:
                invalid_frames = []

            valid_frames = segmentation.valid_frames(invalid_frames,
                                                     length=np.alen(stack))
            std = segmentation.std(stack, valid_frames=valid_frames)
            print('Saving std...')
            io.save(base + STD_DEV_SUFFIX, std, spacing=io.SPACING_JAKOB)
            print('Finding rois...')
            rois = segmentation.find_rois_template(std, template=template)
            print('Saving rois...')
            np.save(base + ROIS_SUFFIX, rois)
            print('Getting traces...')
            traces = segmentation.get_traces(stack, rois, use_radius=5)
            print('Saving traces...')
            np.save(base + TRACES_SUFFIX, traces)

        except Exception as e:
            print('An exception occured:')
            print(e)
Ejemplo n.º 32
0
def run_epoch(session, m, students, eval_op, verbose=False):
    pred_prob = []
    actual_labels = []  # use for whole comparasion

    skill_id_origin_list = []
    target_id_origin_list = []
    iteration = int(len(students) / m.batch_size)

    for i_iter in pyprind.prog_percent(range(iteration)):
        #bar.update(m.batch_size)
        x = np.zeros((m.batch_size, m.num_steps, m.seq_width))

        target_id = np.array([], dtype=np.int32)
        skill_id_origin = np.array([], dtype=np.int32)
        target_id_origin = np.array([], dtype=np.int32)
        target_correctness = []  # use for just a batch

        #load data for a batch
        # tuple formate
        # 0: user_id
        # 1: record_numb
        # 2: data
        # 3: Target_Id
        # 4: correctness
        for i_batch in range(m.batch_size):
            student = students[i_iter * m.batch_size + i_batch]
            record_num = student[1]
            #record_content_pd = student[2].reset_index(drop=True)
            record_content = student[2].as_matrix()
            temp_skill_id_list = list(student[2]['skill_id'])
            skill_id = student[3]
            correctness = student[4]

            # construct data for training:
            # data ~ x
            # target_id ~ skill_id
            # target_correctness ~ correctness
            for i_recordNumb in range(record_num):
                if (i_recordNumb < m.num_steps):
                    x[i_batch,
                      i_recordNumb, :] = record_content[i_recordNumb, :]

                    if skill_id[i_recordNumb] in m.skill_set:
                        temp = i_batch * m.num_steps * m.skill_num + i_recordNumb * m.skill_num + skill_id[
                            i_recordNumb]
                        temp_i = skill_id[i_recordNumb]
                        temp_s = temp_skill_id_list[i_recordNumb]
                    else:
                        temp = i_batch * m.num_steps + i_recordNumb * m.skill_num + 0
                        temp_i = 0
                        temp_s = temp_skill_id_list[i_recordNumb]

                    target_id = np.append(target_id, [[temp]])
                    target_id_origin = np.append(target_id_origin, [[temp_i]])
                    skill_id_origin = np.append(skill_id_origin, [[temp_s]])

                    target_correctness.append(int(correctness[i_recordNumb]))
                    actual_labels.append(int(correctness[i_recordNumb]))
                else:
                    break

            #test inter_skill and intra_skill
            """
            if (record_num<=m.num_steps):
                skill_id_origin = np.append(skill_id_origin,temp_skill_id_list)
            else:
                skill_id_origin = np.append(skill_id_origin,temp_skill_id_list[:m.num_steps])
            """
        pred, _ = session.run(
            [m.pred, eval_op],
            feed_dict={
                m.inputs: x,
                m.target_id: target_id,
                m.target_correctness: target_correctness
            })

        for s in skill_id_origin:
            skill_id_origin_list.append(s)

        for t in target_id_origin:
            target_id_origin_list.append(t)

        for p in pred:
            pred_prob.append(p)

    # print ("------------------len ",len(skill_id_origin_list),"\t",len(target_id_origin_list))
    # print (skill_id_origin_list[:100])
    # print (target_id_origin_list[:100])
    rmse, auc, r2 = get_evaluate_result(actual_labels, pred_prob)

    #print ("==> predict_prob shape\t",np.shape(pred_prob),'\tactual_labels\t',np.shape(actual_labels),'\ttarget_id_list\t',np.shape(target_id_origin_list))
    #print (target_id_origin_list[1:100])
    intra_skill_actual = []
    intra_skill_pred = []

    inter_skill_actual = []
    inter_skill_pred = []

    for idx in np.arange(len(target_id_origin_list)):
        if skill_id_origin_list[idx] == target_id_origin_list[idx]:
            intra_skill_actual.append(actual_labels[idx])
            intra_skill_pred.append(pred_prob[idx])
        else:
            inter_skill_actual.append(actual_labels[idx])
            inter_skill_pred.append(pred_prob[idx])

    inter_rmse, inter_auc, inter_r2 = get_evaluate_result(
        inter_skill_actual, inter_skill_pred)
    intra_rmse, intra_auc, intra_r2 = get_evaluate_result(
        intra_skill_actual, intra_skill_pred)

    return rmse, auc, r2, inter_rmse, inter_auc, inter_r2, intra_rmse, intra_auc, intra_r2
Ejemplo n.º 33
0
def time_add_level_process(data):
    data = data.reset_index(drop=True)
    bins = np.arange(min(data['time']), max(data['time']),
                     code0.DatasetParameter().time_interval * 2)
    correct_mean_list = []
    correct_std_list = []
    correct_num_list = []
    for item_index in pp.prog_percent(range(len(bins)),
                                      stream=sys.stdout,
                                      title='==> get correctness'):
        up_bin = bins[item_index] + code0.DatasetParameter().time_interval
        down_bin = bins[item_index] - code0.DatasetParameter().time_interval

        temp_data = data[data['time'] >= down_bin]
        temp_data = temp_data[temp_data['time'] < up_bin]

        temp_correct_list = list(temp_data['correct'])
        correct_num_list.append(len(temp_correct_list))
        if (len(temp_correct_list) != 0):
            correct_mean_list.append(np.mean(temp_correct_list, axis=0))
            correct_std_list.append(np.std(temp_correct_list, axis=0))
        else:
            correct_mean_list.append(0)
            correct_std_list.append(0)

    # plot the relationship
    fig, axs = plt.subplots(nrows=2, ncols=1, sharex=True)
    ax = axs[0]
    ax.plot(bins, correct_mean_list)
    ax.set_title('correctness')
    boundary_list = code0.DatasetParameter().correct_boundary_list
    for nmber in boundary_list:
        ax.axhline(y=nmber, xmin=0, xmax=1, c="red", linewidth=0.5, zorder=0)

    ax = axs[1]
    ax.plot(bins, correct_num_list)
    ax.set_title("time z score distribution")

    ax.set_xlim([-2, 4])
    plt.savefig('./result/assistment2009/time_distribution_correctness_' +
                str(datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")) +
                '.png')
    # plt.show()

    # add a colum according to correctness boundary
    time_level_list = []
    temp_list = list(data['time'])
    bd = code0.DatasetParameter().time_boundary_list
    # 0 ~        time <-0.8
    # 1 ~ -0.8 < time < -0.6
    # 2 ~ -0.6 < time < 0
    # 3 ~    0 < time
    for idx in range(len(temp_list)):
        if temp_list[idx] <= bd[0]:
            time_level_list.append(0)
        elif (bd[0] < temp_list[idx] and temp_list[idx] <= bd[1]):
            time_level_list.append(1)
        elif (bd[1] < temp_list[idx] and temp_list[idx] <= bd[2]):
            time_level_list.append(2)
        elif (temp_list[idx] > bd[2]):
            time_level_list.append(3)
        else:
            raise Exception("Error in time division")

    data['time_level'] = time_level_list
    return data