def attempt_and_hint_process(data): print('==> remove records whose attempt_account is more than 15') data = data[data['attempt_count'] <= 15] data = data.reset_index(drop=True) problem_list = np.unique(data['problem_id']) attempt_dict = {} hint_dict = {} attempt_list = [] hint_list = [] for idx in pp.prog_percent( range(len(problem_list)), stream=sys.stdout, title='==> get attmept and hint max value at problem level'): temp_data = data[data['problem_id'] == problem_list[idx]] attempt_dict[problem_list[idx]] = max(temp_data['attempt_count']) attempt_list.append(max(temp_data['attempt_count'])) hint_dict[problem_list[idx]] = max(temp_data['hint_count']) hint_list.append(max(temp_data['hint_count'])) fig, axs = plt.subplots(nrows=2, ncols=1, sharex=False) ax = axs[0] ax.hist(attempt_list, bins=np.arange(0, 16, 1)) ax.set_title('max attempt distribution') ax.set_xlabel("attempt(max)") ax.set_ylabel("number") ax = axs[1] ax.hist(hint_list) ax.set_title("max hint distribution") ax.set_xlabel("hint(max)") ax.set_ylabel("number") plt.savefig('./result/assistment2009/attempt_hint_number_' + datetime.datetime.now().strftime("%Y-%m-%d-%H-%M") + '.png') for idx in pp.prog_percent( range(len(data)), stream=sys.stdout, title='==> cast attempt count and hint count to value/max'): if attempt_dict[data.loc[idx, 'problem_id']] == 0: data.loc[idx, 'attempt_count_level'] = -1 else: data.loc[ idx, 'attempt_count_level'] = data.loc[idx, 'attempt_count'] / ( attempt_dict[data.loc[idx, 'problem_id']] * 1.0) if hint_dict[data.loc[idx, 'problem_id']] == 0: data.loc[idx, 'hint_count_level'] = -1 else: data.loc[idx, 'hint_count_level'] = data.loc[idx, 'hint_count'] / ( hint_dict[data.loc[idx, 'problem_id']] * 1.0) return data
def normalization_continues_data(data): print('==> normalize continues data') columns_name_list = ["attempt_count", "time", "hint_count"] data = data.reset_index(drop=True) size = len(data) for column_name in columns_name_list: if column_name == "time": bins = [-1, 60, 300, 1200, 3600, 60000000] data[column_name] = pd.cut(data[column_name], bins, labels=False) tmpList = [] for i in pyprind.prog_percent(range(size), stream=sys.stdout, title=column_name): try: tmp = int(data.loc[i, column_name]) except: tmp = 0 # raise ValueError(str(data.loc[i, column_name])+"_"+str(i)) tmpList.append(math.log((tmp + 2), 6)) data['time_normal'] = tmpList elif column_name == "attempt_count": bins = [-10, 1, 20, 100, 40000] data[column_name] = pd.cut(data[column_name], bins, labels=False) data[column_name] += 1 tmpList = [] for i in pyprind.prog_percent(range(size), stream=sys.stdout, title=column_name): # print ("attempt_count\t",str(i)) tmp = int(data.loc[i, column_name]) tmpList.append(math.log((tmp + 1), 5)) data['attempt_count_normal'] = tmpList elif column_name == "hint_count": bins = [-1, 0, 2, 4, 3000] data[column_name] = pd.cut(data[column_name], bins, labels=False) data[column_name] += 1 tmpList = [] for i in pyprind.prog_percent(range(size), stream=sys.stdout, title=column_name): try: tmp = int(data.loc[i, column_name]) except: tmp = 0 tmpList.append(math.log((tmp + 1), 5)) data['hint_count_normal'] = tmpList else: raise ValueError("check your continus_columns parameter!") return data
def add_cross_feature_to_dataset(dataset, dp): if len(dp.dataset_columns_for_cross_feature) == 0: print("==> no need to add cross feature to dataset") return dataset else: print("==> add cross feature to dataset") columns_max, columns_numb, _ = get_columns_info(dataset) d_size = len(dataset) for item in dp.dataset_columns_for_cross_feature: print("==> add", aux.connectStringfromList(item)) temp = [] for i in pyprind.prog_percent(range(d_size), stream=sys.stdout, title=item): if len(item) == 2: value = dataset.loc[i, item[0]] + dataset.loc[ i, item[1]] * (columns_max[item[0]] + 1) elif len(item) == 3: value = dataset.loc[i, item[0]] + dataset.loc[i, item[1]] * (columns_max[item[0]] + 1) + \ dataset.loc[i, item[2]] * (columns_max[item[0]] + 1) * (columns_max[item[1]] + 1) else: raise ValueError('cross features only support 3 at most') temp.append(value) dataset[aux.connectStringfromList(item)] = temp return dataset
def connectUser(data, connected_file_name): print("==> load data successful") u, c = counter(data['user_id']) # UserNumberDict = dict(zip(u, c)) userQuesNumIndexList = getUserQuesNumIndexList(data['user_id']) newdata = pd.DataFrame() print('==> begin concatenate dataset') for i in pp.prog_percent(range(len(u)), stream=sys.stdout): for k in range(len(userQuesNumIndexList)): if userQuesNumIndexList[k, 0] == u[i]: temp = data.iloc[int(userQuesNumIndexList[ k, 2]):int(userQuesNumIndexList[k, 2] + userQuesNumIndexList[k, 1])] newdata = newdata.append(temp) newdata.reset_index(drop=True) newdata.to_csv(connected_file_name, index=False) print( '==> before connect\t', aux.stastic_SecNumber_UserNumber_SkillNumber(data, code0.DatasetParameter())) print( '==> after connect\t', aux.stastic_SecNumber_UserNumber_SkillNumber(newdata, code0.DatasetParameter())) return newdata
def orthogonal_averaged(pix_map, traces, ts, shape, contribution_f=None, fill_value = 0): if not contribution_f: contribution_f = contribution_function frame_involved, frame_dists, pixel_list = pix_map # improves ETA calculation random.shuffle(pixel_list) contributions = np.zeros(frame_dists.shape, dtype=object) for p in pixel_list: contributions[p] = [contribution_f(d) for d in frame_dists[p]] contributions[p] /= np.sum(contributions[p]) # frame = np.zeros(shape) #dim = len(shape) #sel = tuple(np.array(pixel_list).T) video = np.full((len(ts),) + shape, fill_value, dtype=np.float32) sl = (slice(np.alen(video)),) for p in prog_percent(pixel_list): # frame[p] = np.sum(traces[frame_involved[p], ts]*contributions[p]) traces_in_p = traces[frame_involved[p]][:,ts] traces_weighed = (traces_in_p.T*contributions[p]).T video[sl + p] = np.sum(traces_weighed, axis=0) # does not work # frame[sel] = np.sum(traces[frame_involved[sel], t]*contributions[sel], axis=dim) return video
def BuildIndex(self): def bigram(title): bigram = (title.split(',')[0], title.split(',')[1].replace('.', '')) title = re.sub(r'\(.*\)', '', title.split(',')[0]).split()[0].strip() bigram += (title, ) if len(title) > 2: prefix = title[0] for i in range(1, len(title)): if title[i:].count(title[i]) == 1: bigram += (prefix + title[i],) return bigram tmp = dict() for i in pyprind.prog_percent(Course.objects.all()): key = bigram(i.title) titleTerms = self.title2terms(i.title) CourseCode = i.code for k in key: tmp.setdefault(k, set()).add(CourseCode) for t in titleTerms: tmp.setdefault(t, set()).add(CourseCode) tmp.setdefault(i.professor, set()).add(CourseCode) tmp.setdefault(CourseCode, set()).add(CourseCode) result = tuple( {'key':key, self.school:list(value)} for key, value in tmp.items() if key != '' and key!=None) self.SrchCollect.remove({}) self.SrchCollect.insert(result) self.SrchCollect.create_index([("key", pymongo.HASHED)])
def register_whole_stack(self, save_path="", max_cell_radius=5, cell_size_iterations=5, cell_rel_threshold=0.4, min_cell_intensity=20, cell_overlap=0): #disp = [] #self.aligned_frame = [None] * self.nt self.displacement = np.empty([self.nt, 3]) self.registered_stack = np.empty((self.nt, self.nz, self.nx, self.ny), dtype=np.uint16) self._prepare_std_deviation_and_invalid_frames_and_result() self._prepare_invalid_frames() register_queue = self._create_queue(self._align_frame_worker, self.thread_count * 2) for t in prog_percent(range(self.nt)): register_queue.put( [self.read_frame(t), t, self.registered_stack[t], t]) register_queue.join() return self.displacement, self.registered_stack
def plane_wise(arguments, z_range=None, print_output=False): image = io.load(arguments.input_file) ref = io.load(arguments.reference) r1 = util.randomword(20) r2 = util.randomword(20) tmpf = tempfile.gettempdir() def plane_name(r, z): return os.path.join(tmpf, r + '_z%d.nrrd' % z) if not z_range: z_range = np.arange(0, image.shape[0]) warped = [] results = [] for z in prog_percent(z_range): inp_plane = image[z] ref_plane = ref[z] inp_fn = plane_name(r1, z) ref_fn = plane_name(r2, z) io.save(inp_fn, inp_plane) io.save(ref_fn, ref_plane) arguments_z = copy.deepcopy(arguments) arguments_z.input_file = inp_fn arguments_z.reference = ref_fn arguments_z.dimensions = 2 result = run_antsreg(arguments_z) results.append(result) warped.append(result.load_warped()) return img.cmp_images(warped), results
def run_gridsearch(*args, **kwargs): f, configs = grid_search(*args, **kwargs) results = [] for i, c in prog_percent(list(enumerate(configs))): result = f(c) results.append(result) return results
def planewise_affine(fixed, moving, return_transforms=False): zshift = get_zshift(fixed, moving) fixed = to_numpy(fixed) moving = to_numpy(moving) size_z = fixed.shape[0] warped = np.zeros_like(fixed) transforms = [None]*size_z for z in prog_percent(list(range(max((0, -zshift)), min((size_z, -zshift + size_z))))): mov = ants.from_numpy(moving[z + zshift].swapaxes(0, 1)) fix = ants.from_numpy(fixed[z].swapaxes(0, 1)) res = ants.registration(mov, fix, type_of_transform='Affine', reg_iterations=[500, 500, 500], grad_step=.1, verbose=True) t = ants.read_transform(res['fwdtransforms'][0]) transforms[z] = t trans = ants.apply_ants_transform_to_image(t, mov, fix) warped[z] = trans.numpy().swapaxes(0, 1) if return_transforms: return warped, (transforms, zshift) return warped
def run(self): if not self.prepared: self.prepare() frame_stack = np.zeros(self.lif_shape, np.uint16) ts = list(range(self.nf)) for f in prog_percent(ts): for z in range(self.lif_shape[1]): plane = self.ir.read(z=z, t=f, c=0, series=self.lif_idx, rescale=False) frame_stack[f, z] = toCPU(self.alignPlaneGPU(toGPU(plane), z, f)) """ self.frameCount -= len(self.invalidFrames) self.stdDeviation = toCPU(th.sqrt((self.sumSqTensor[z] - self.sumTensor[z]**2/self.frameCount)/(self.frameCount-1))) """ #stdDevDiffTensor = toGPU(sumSqStdDev[z]) + sumSqTensor[z] - (toGPU(sumStdDev[z]) + sumTensor[z])**2/frameCount #stdDeviation = toCPU(th.sqrt(stdDevDiffTensor**2/(frameCount-1))) return dict(aligned=frame_stack, shifts=self.shifts)
def time_basic_process(data): # -1-transfer to second unit print("==> transfer time unit: millsecond to second") tempTimeList = list(data['time']) newTimeList = [int(x / 1000) for x in tempTimeList] data['time'] = newTimeList del newTimeList, tempTimeList # -2-remove outlier records print('==> delete outlier of time feature') print('==> length before delete\t', len(data)) data = data[(data['time'] <= code0.DatasetParameter().time_threshold) & (data['time'] > 0)] print('==> length after delete\t', len(data)) # -3-transfer to z-score time_z_level = code0.DatasetParameter().time_z_level print('==> preprocerss time to z-score based on ', time_z_level) time_z_id_set = np.unique(data[time_z_level]) std_dict = {} mean_dict = {} for itme_id in pp.prog_percent(time_z_id_set, stream=sys.stdout, title='==> extract mean and std of time'): temp_data = data[data[time_z_level] == itme_id] temp_list = list(temp_data['time']) # print ('-- problem_id ',problem_id,' -- ',len(temp_list),' --') std_dict[itme_id] = np.std(temp_list, axis=0) mean_dict[itme_id] = np.mean(temp_list, axis=0) assert len(std_dict) == len(mean_dict) data = data.reset_index(drop=True) for id in pp.prog_percent(range(len(data)), stream=sys.stdout, title='==> cast time to z-score'): data.loc[id, 'time'] = (data.loc[id, 'time'] - mean_dict[data.loc[id, time_z_level]]) / ( std_dict[data.loc[id, time_z_level]] * 1.0) data = data.fillna(0) """ plt.hist(list(data['time']), bins=np.arange(min(data['time']), max(data['time']), code0.DatasetParameter().time_interval*2)) plt.title("time z score distribution") plt.savefig('./result/assistment2009/time_distribution' + str(datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")) + '.png') """ return data
def orthogonal(rois, traces, color_func, ts, shape): activity = np.zeros((np.alen(ts),) + tuple(shape) + (3,), dtype=np.uint8) for i, t in prog_percent(list(enumerate((ts)))): for roi_id, (roi, trace) in enumerate(zip(rois, traces)): x, y, z, _ = roi activity[i, y, x] = color_func(trace[t]) return activity
def slice_series(fn, z, ts): for new_t, t in prog_percent(list(enumerate(ts))): frame = io.get_frame(fn, t) if new_t == 0: new_shape = list(frame.shape)[1:] new_shape = [len(ts)] + new_shape new = np.zeros(new_shape) new[new_t, :, :] = frame[z] return new
def cut_series(fn, ts): for new_t, t in prog_percent(list(enumerate(ts))): frame = io.get_frame(fn, t) if new_t == 0: new_shape = list(frame.shape) new_shape.insert(1, len(ts)) new = np.zeros(new_shape) new[:, new_t, :, :] = frame return new
def prediction(model_0, data_test, label_test): well_sorted = 0 model_0 = load_model('%s/clusters_saves/iteration_0/save_cluster_0/save' % root_path) # model 0 list_dir_iteration = sorted(os.listdir(cluster_save_dir), key=lambda k: int(k.split("_")[-1])) number_images = len(data_test) for row in pyprind.prog_percent(range(0, number_images)): #for each image Y_prob = model_0.predict(data_test[row][np.newaxis, :]) Y_classes = Y_prob.argmax( axis=1) # predicts the image label based on the best probability label_name = all_label_names[all_label_numbers.index(Y_classes)] for iteration in list_dir_iteration: #for each iteration directory species_found = False list_clusters = sorted(os.listdir(root_path + '/clusters_saves' + '/' + iteration), key=lambda k: int(k.split("_")[-1])) for cluster in list_clusters: #for each cluster in the iteration directory if not species_found: cluster_species = np.genfromtxt( root_path + '/clusters_saves' + '/' + iteration + '/' + cluster + '/labels.csv', dtype=None, encoding=None)[1:] if label_name in cluster_species: species_found = True model = load_model(root_path + '/clusters_saves' + '/' + iteration + '/' + cluster + 'save') Y_prob = model.predict(data_test[row][np.newaxis, :]) Y_classes = Y_prob.argmax( axis=1 ) # predicts the image label based on the best probability ID = int(cluster.split('_')[-1]) label_name = all_index_with_ID[ all_index_with_ID.index(ID) + 1][Y_classes][1] #image's label name true_label = np.where( label_test[row][np.newaxis, :] == 1)[1][0] # theoretical label true_label_name = all_label_names[all_label_numbers.index(true_label)] if true_label_name == label_name: well_sorted += 1 if not row == 0: print(well_sorted / row) # progress acc = well_sorted / number_images return acc
def loadfiles(self): stack = [] for fn in prog_percent(self.files): im = cv2.imread(fn, 0).astype(np.float32) if im.shape == (488, 648): im = im[4:-4, 4:-4] stack.append(im) return np.array(stack, dtype=np.float32)
def score_results(results, metric='MI[$REF_IN,1,32,Regular,0.25]'): def func(res): warped = res.get_warped() ref = res.arguments.reference sim = measure_similarity(ref, warped, metric) return sim, res print('Scoring') scored = list(map(func, prog_percent(results))) scored.sort(key=lambda e: e[0]) return scored
def main(argv): folder = '/Users/koesterlab/registered/control/' files = glob(folder + '*_aligned.h5') #files = fileinput.input() for f in prog_percent(files): try: r = re.compile(r'^' + folder + '(?P<fn>.*)_aligned.h5') m = r.match(f) fn = m.group('fn') process_file(folder + fn) except Exception as e: print(e)
def lif_read_stack(fn): ir = lif_open(fn) img_i = lif_find_timeseries(fn) shape = get_shape(fn, img_i) stack = np.empty(shape, dtype=np.uint16) # Load the whole stack... for t in prog_percent(range(stack.shape[0])): for z in range(stack.shape[1]): stack[t, z] = ir.read(t=t, z=z, c=0, series=img_i, rescale=False) return stack
def time_basic_process(data): # -1-transfer time to 'integar' from 'str' # -2-remove outlier records old_time_list = list(data['time']) new_time_list = [] for i in old_time_list: kp = int(float(i)) if kp > 150: kp = 150 new_time_list.append(kp) data['time'] = new_time_list # -3-transfer to z-score time_z_level = 'skill_id' print('==> preprocerss time to z-score based on ', time_z_level) time_z_id_set = np.unique(data[time_z_level]) std_dict = {} mean_dict = {} for itme_id in pp.prog_percent(time_z_id_set, stream=sys.stdout, title='==> extract mean and std of time'): temp_data = data[data[time_z_level] == itme_id] temp_list = list(temp_data['time']) # print ('-- problem_id ',problem_id,' -- ',len(temp_list),' --') std_dict[itme_id] = np.std(temp_list, axis=0) mean_dict[itme_id] = np.mean(temp_list, axis=0) assert len(std_dict) == len(mean_dict) data = data.reset_index(drop=True) for id in pp.prog_percent(range(len(data)), stream=sys.stdout, title='==> cast time to z-score'): data.loc[id, 'time'] = (data.loc[id, 'time'] - mean_dict[data.loc[id, time_z_level]]) / ( std_dict[data.loc[id, time_z_level]] * 1.0) return data
def attemp_hint_and_correctness_analysis(data): data = data.reset_index(drop=True) bins = np.concatenate([[-1], np.arange(0.0, 1.1, 0.1)]) for attri in ['hint_count_level', 'attempt_count_level']: correct_mean_list = [] correct_std_list = [] correct_num_list = [] for item_index in pp.prog_percent( range(len(bins)), stream=sys.stdout, title='==> get correctness according to ' + attri): up_bin = bins[item_index] + 0.05 down_bin = bins[item_index] - 0.05 temp_data = data[(data[attri] >= down_bin) & (data[attri] < up_bin)] temp_correct_list = list(temp_data['correct']) correct_num_list.append(len(temp_correct_list)) if (len(temp_correct_list) != 0): correct_mean_list.append(np.mean(temp_correct_list, axis=0)) correct_std_list.append(np.std(temp_correct_list, axis=0)) else: correct_mean_list.append(0) correct_std_list.append(0) fig, axs = plt.subplots(nrows=2, ncols=1, sharex=True) ax = axs[0] ax.plot(bins, correct_mean_list) ax.set_title('correctness ' + attri) boundary_list = code0.DatasetParameter().correct_boundary_list for nmber in boundary_list: ax.axhline(y=nmber, xmin=0, xmax=1, c="red", linewidth=0.5, zorder=0) ax = axs[1] ax.plot(bins, correct_num_list) ax.set_title(attri + " number distribution") ax.set_xlim([-1.1, 1.1]) plt.savefig('./result/assistment2009/' + attri + '_correctness_' + str(datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")) + '.png')
def run(self, number_of_runs=1, max_generations=None): # Some Stats number_lost = 0 num_of_generations = [] if max_generations is not None: self.max_generations = max_generations graph.ylabel('Proportion of p') graph.xlabel('Number of Generations') # Selection With Drift for i in pyprind.prog_percent(range(number_of_runs)): results, lost = self.single_run() num_of_generations.append(len(results)) number_lost += lost graph.plot(np.array(results) / self.allele_pool_size, linewidth=1.0, alpha=0.4) num_of_generations = np.array(num_of_generations) graph.plot( [0, num_of_generations.max()], [1, 1], linewidth=1.0, linestyle='--', label='Fixation Point', color='k' ) # Selection Alone (Benchmark) graph.plot( np.array(self.selection_only_benchmark(break_point=num_of_generations.max())) / self.allele_pool_size, linewidth=1.5, color='k', label='Selection Only (Non-Discrete)' ) graph.title( '{} Simulation ({}% of runs lost allele p)'.format( self.type_of_trait, round(number_lost * 100 / number_of_runs, 2) ) ) graph.legend(loc=4) graph.xlim(0, num_of_generations.max()) graph.ylim(0, 1.05) graph.show() return num_of_generations, number_lost
def run_ae_epoch(sess, model, data, TrainConfig): batch_number = int(len(data) / (TrainConfig.batch_size * TrainConfig.num_steps)) learning_rate = TrainConfig.learning_rate for i in pyprind.prog_percent(range(batch_number), stream=sys.stdout): x = np.zeros((TrainConfig.batch_size, TrainConfig.num_steps, TrainConfig.seq_width)) kindex = i * (TrainConfig.batch_size * TrainConfig.num_steps) for ip in range(TrainConfig.batch_size): for j in range(TrainConfig.num_steps): x[ip, j, :] = data.iloc[kindex] kindex += 1 #mask_np = np.random.binomial(1, 1 - TrainConfig.corruption_level, [TrainConfig.batch_size * TrainConfig.num_steps,TrainConfig.seq_width]) learning_rate = learning_rate*TrainConfig.lr_decay if learning_rate<=TrainConfig.min_lr: learning_rate = TrainConfig.min_lr _ = sess.run(model.optimizer, feed_dict={model.inputs: x}) avgcost = sess.run(model.avgcost, feed_dict={model.inputs: x}) return avgcost
def attempt_add_level_process(data): """ based on correctness and attempt relationship 0 - attempt: 0 - 0 1 - attempt: 1 - 81.7% 2 - attempt: 2 - 3 - attempt: 0 - 0 """ temp_list = [] for item in pp.prog_percent(list(data['attempt_count']), stream=sys.stdout, title='==> cast attmept to attempt_level'): if item == 0: temp = 0 elif item == 1: temp = 1 else: temp = 2 temp_list.append(temp) data['attempt_level'] = temp_list return data
def Transfer_data(dataset, dp, ap): g = tf.Graph() with g.as_default(): inputs = tf.placeholder(tf.float32, [ap.batch_size, ap.num_steps, dp.seq_width]) m = ONEHOTENCODERINPUT(ap, dp, inputs, printControl=False) with tf.Session(graph=g) as sess: iterations = int(len(dataset) / (ap.batch_size * ap.num_steps)) dataset = dataset.as_matrix() x_sum = [] for j in pyprind.prog_percent(range(iterations), title="transfer data"): tmpData = dataset[j * ap.batch_size * ap.num_steps:(j + 1) * ap.batch_size * ap.num_steps, :] record_content = tmpData.reshape( [ap.batch_size, ap.num_steps, dp.seq_width]) tmpResult = sess.run(m.get_init_value_for_train_weights(), feed_dict={inputs: record_content}) if j == 0: x_sum = tmpResult else: x_sum = np.vstack([x_sum, tmpResult]) return x_sum
def test_generator(): for i in pyprind.prog_percent(range(n), stream=sys.stdout): time.sleep(sleeptime)
def time_add_level_process(data): time_interval = 0.025 boundary_list = [0.5, 0.7] data = data.reset_index(drop=True) bins = np.arange(min(data['time']), max(data['time']), time_interval * 2) correct_mean_list = [] correct_std_list = [] correct_num_list = [] for item_index in pp.prog_percent(range(len(bins)), stream=sys.stdout, title='==> get correctness'): up_bin = bins[item_index] + time_interval down_bin = bins[item_index] - time_interval temp_data = data[(data['time'] >= down_bin) & (data['time'] < up_bin)] temp_correct_list = list(temp_data['correct']) """ if up_bin<=-1: print ("---"*20) print ("*\t",down_bin) print ("*\t",up_bin) print (temp_correct_list) #print (temp_data) print ("---"*20) """ correct_num_list.append(len(temp_correct_list)) if (len(temp_correct_list) != 0): if np.mean(temp_correct_list, axis=0) > 1: print("******\t", np.mean(temp_correct_list, axis=0), "\t", temp_correct_list) correct_mean_list.append(np.mean(temp_correct_list, axis=0)) correct_std_list.append(np.std(temp_correct_list, axis=0)) else: correct_mean_list.append(0) correct_std_list.append(0) # plot the relationship fig, axs = plt.subplots(nrows=2, ncols=1, sharex=True) ax = axs[0] ax.plot(bins, correct_mean_list, "r.") ax.set_title('correctness') for nmber in boundary_list: ax.axhline(y=nmber, xmin=0, xmax=1, c="red", linewidth=0.5, zorder=0) ax = axs[1] ax.plot(bins, correct_num_list, "b--") ax.set_title("time z score distribution") ax.set_xlim([-2, 4]) plt.savefig('./result/cmu_stat_f2011/time_distribution_correctness_' + str(datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")) + '.png') #plt.show() # add a colum according to correctness boundary time_level_list = [] temp_list = list(data['time']) bd = [-1.2, -0.7, 0.75] # 0 ~ time < -1.2 # 1 ~ -1.2 < time < -0.7 # 2 ~ -0.7 < time < 0.75 # 3 ~ 0.75 < time for idx in range(len(temp_list)): if temp_list[idx] <= bd[0]: time_level_list.append(0) elif (bd[0] < temp_list[idx] and temp_list[idx] <= bd[1]): time_level_list.append(1) elif (bd[1] < temp_list[idx] and temp_list[idx] <= bd[2]): time_level_list.append(2) elif (temp_list[idx] > bd[2]): time_level_list.append(3) else: raise Exception("Error in time division") print("==> add time_level") data['time_level'] = time_level_list return data
print('\n%s' % (80 * '=')) print('%s\n' % (80 * '=')) print('Testing stdout Stream\n') perc = pyprind.ProgPercent(n, stream=sys.stdout) for i in range(n): perc.update() print('\n%s' % (80 * '=')) print('%s\n' % (80 * '=')) print('Testing Percentage Indicator Generator\n') for i in pyprind.prog_percent(range(n), stream=sys.stdout): # do something pass print('\n%s' % (80 * '=')) print('%s\n' % (80 * '=')) print('Testing monitor function\n') perc = pyprind.ProgPercent(n, monitor=True) for i in range(n): perc.update() print(perc) print('\n%s' % (80 * '='))
import sys sys.path.append(CAFFE_ROOT + '/python/') import caffe import pyprind images = map(lambda x: IMAGES_PREFIX + x.split(',')[0] + '.jpg', open(DATA_FILE, 'r').readlines()[1:]) print len(images) NPY_FILENAME = 'pred_test_{}.npy'.format(SCALE) caffe.set_mode_gpu() net = caffe.Classifier(model_file=MODEL_FILENAME, pretrained_file=PRETRAINED, image_dims=[SCALE, SCALE], raw_scale=255, mean=np.array([104, 117, 123]), channel_swap=(2,1,0)) preds = np.empty((len(images), 2)) for i in pyprind.prog_percent(range(0, preds.shape[0], BATCH_SIZE)): i2 = min(i+BATCH_SIZE, preds.shape[0]) imgs = [caffe.io.load_image(img) for img in images[i:i2]] preds[i:i2, ...] = net.predict(imgs, oversample=True, interp_order=3) np.save(NPY_FILENAME, preds)
def run(args): if args.files: files = args.files elif args.listfile: with open(args.listfile) as f: files = list(map(str.strip, f.readlines())) else: parser.print_usage() sys.exit(0) failed = False for f in files: ext = os.path.splitext(f) if not os.path.exists(f): eprint('Error: %s does not exist.' % f) failed = True elif not (ext[1] == '.h5' or ext[1] == '.hdf5'): eprint('Error: %s is not a hdf5 file.' % f) failed = True elif not os.path.isfile(f): eprint('Error: %s is not a file.' % f) failed = True if failed: sys.exit(1) sub = None if args.substitute: split = args.substitute.split(':') if len(split) != 2: eprint( 'Error: invalid subsitution syntax "%s". Syntax is "replace:with".' % args.substitute) failed = True sub = split if args.destdir: if not os.path.isdir(args.destdir): eprint('Error: destination dir "%s" does not exist.' % args.destdir) failed = True if failed: sys.exit(1) bases = [] shifts_fns = [] for f in files: base_name = os.path.splitext(f)[0] remove_suffix = '_aligned' if base_name.endswith(remove_suffix): base_name = base_name[:-len(remove_suffix)] shifts_fn = base_name + '_shifts.npy' shifts_fns.append(shifts_fn) if not args.no_shifts and not os.path.exists(shifts_fn): eprint('Error: "%s" does not exist.' % shifts_fn) failed = True if args.destdir: base_name = os.path.join(args.destdir, os.path.basename(base_name)) bases.append(base_name) elif sub: if base_name.find(sub[0]) == -1: eprint( 'Error: filename "%s" does not contain "%s" for substitution.' % (f, sub[0])) failed = True base_name = base_name.replace(*sub) bases.append(base_name) else: bases.append(base_name) if failed: sys.exit(1) necessary = [] if not args.no_verbose: print('Arguments look good. This will be processed:') for f, b in zip(files, bases): this_necessary = not all([os.path.isfile(b + s) for s in SUFFIXES]) or args.overwrite necessary.append(this_necessary) if not args.no_verbose: print(('' if this_necessary else '[SKIP] ') + f) for suffix in SUFFIXES: print((' -> ' if this_necessary else '[ALREADY EXISTS] ') + '%s%s' % (b, suffix)) print() necessary_files = [ (f, shifts_fn, b) for f, shifts_fn, b, n in zip(files, shifts_fns, bases, necessary) if n ] if len(necessary_files) == 0: print('Nothing to process.') sys.exit(0) template = segmentation.load_template() for f, shifts_fn, b in prog_percent(necessary_files): print(f) print('=' * len(f)) try: base = b if not args.no_shifts: print('Loading shifts...') shifts = np.load(shifts_fn) shift_dists = np.sqrt(np.sum(np.square(shifts), axis=1)) print('Loading stack...') stack = dd.io.load(f) print('Computing std...') if not args.no_shifts: invalid_frames = [ i for i in np.arange(np.alen(stack)) if shift_dists[i] > args.shift_threshold ] else: invalid_frames = [] valid_frames = segmentation.valid_frames(invalid_frames, length=np.alen(stack)) std = segmentation.std(stack, valid_frames=valid_frames) print('Saving std...') io.save(base + STD_DEV_SUFFIX, std, spacing=io.SPACING_JAKOB) print('Finding rois...') rois = segmentation.find_rois_template(std, template=template) print('Saving rois...') np.save(base + ROIS_SUFFIX, rois) print('Getting traces...') traces = segmentation.get_traces(stack, rois, use_radius=5) print('Saving traces...') np.save(base + TRACES_SUFFIX, traces) except Exception as e: print('An exception occured:') print(e)
def run_epoch(session, m, students, eval_op, verbose=False): pred_prob = [] actual_labels = [] # use for whole comparasion skill_id_origin_list = [] target_id_origin_list = [] iteration = int(len(students) / m.batch_size) for i_iter in pyprind.prog_percent(range(iteration)): #bar.update(m.batch_size) x = np.zeros((m.batch_size, m.num_steps, m.seq_width)) target_id = np.array([], dtype=np.int32) skill_id_origin = np.array([], dtype=np.int32) target_id_origin = np.array([], dtype=np.int32) target_correctness = [] # use for just a batch #load data for a batch # tuple formate # 0: user_id # 1: record_numb # 2: data # 3: Target_Id # 4: correctness for i_batch in range(m.batch_size): student = students[i_iter * m.batch_size + i_batch] record_num = student[1] #record_content_pd = student[2].reset_index(drop=True) record_content = student[2].as_matrix() temp_skill_id_list = list(student[2]['skill_id']) skill_id = student[3] correctness = student[4] # construct data for training: # data ~ x # target_id ~ skill_id # target_correctness ~ correctness for i_recordNumb in range(record_num): if (i_recordNumb < m.num_steps): x[i_batch, i_recordNumb, :] = record_content[i_recordNumb, :] if skill_id[i_recordNumb] in m.skill_set: temp = i_batch * m.num_steps * m.skill_num + i_recordNumb * m.skill_num + skill_id[ i_recordNumb] temp_i = skill_id[i_recordNumb] temp_s = temp_skill_id_list[i_recordNumb] else: temp = i_batch * m.num_steps + i_recordNumb * m.skill_num + 0 temp_i = 0 temp_s = temp_skill_id_list[i_recordNumb] target_id = np.append(target_id, [[temp]]) target_id_origin = np.append(target_id_origin, [[temp_i]]) skill_id_origin = np.append(skill_id_origin, [[temp_s]]) target_correctness.append(int(correctness[i_recordNumb])) actual_labels.append(int(correctness[i_recordNumb])) else: break #test inter_skill and intra_skill """ if (record_num<=m.num_steps): skill_id_origin = np.append(skill_id_origin,temp_skill_id_list) else: skill_id_origin = np.append(skill_id_origin,temp_skill_id_list[:m.num_steps]) """ pred, _ = session.run( [m.pred, eval_op], feed_dict={ m.inputs: x, m.target_id: target_id, m.target_correctness: target_correctness }) for s in skill_id_origin: skill_id_origin_list.append(s) for t in target_id_origin: target_id_origin_list.append(t) for p in pred: pred_prob.append(p) # print ("------------------len ",len(skill_id_origin_list),"\t",len(target_id_origin_list)) # print (skill_id_origin_list[:100]) # print (target_id_origin_list[:100]) rmse, auc, r2 = get_evaluate_result(actual_labels, pred_prob) #print ("==> predict_prob shape\t",np.shape(pred_prob),'\tactual_labels\t',np.shape(actual_labels),'\ttarget_id_list\t',np.shape(target_id_origin_list)) #print (target_id_origin_list[1:100]) intra_skill_actual = [] intra_skill_pred = [] inter_skill_actual = [] inter_skill_pred = [] for idx in np.arange(len(target_id_origin_list)): if skill_id_origin_list[idx] == target_id_origin_list[idx]: intra_skill_actual.append(actual_labels[idx]) intra_skill_pred.append(pred_prob[idx]) else: inter_skill_actual.append(actual_labels[idx]) inter_skill_pred.append(pred_prob[idx]) inter_rmse, inter_auc, inter_r2 = get_evaluate_result( inter_skill_actual, inter_skill_pred) intra_rmse, intra_auc, intra_r2 = get_evaluate_result( intra_skill_actual, intra_skill_pred) return rmse, auc, r2, inter_rmse, inter_auc, inter_r2, intra_rmse, intra_auc, intra_r2
def time_add_level_process(data): data = data.reset_index(drop=True) bins = np.arange(min(data['time']), max(data['time']), code0.DatasetParameter().time_interval * 2) correct_mean_list = [] correct_std_list = [] correct_num_list = [] for item_index in pp.prog_percent(range(len(bins)), stream=sys.stdout, title='==> get correctness'): up_bin = bins[item_index] + code0.DatasetParameter().time_interval down_bin = bins[item_index] - code0.DatasetParameter().time_interval temp_data = data[data['time'] >= down_bin] temp_data = temp_data[temp_data['time'] < up_bin] temp_correct_list = list(temp_data['correct']) correct_num_list.append(len(temp_correct_list)) if (len(temp_correct_list) != 0): correct_mean_list.append(np.mean(temp_correct_list, axis=0)) correct_std_list.append(np.std(temp_correct_list, axis=0)) else: correct_mean_list.append(0) correct_std_list.append(0) # plot the relationship fig, axs = plt.subplots(nrows=2, ncols=1, sharex=True) ax = axs[0] ax.plot(bins, correct_mean_list) ax.set_title('correctness') boundary_list = code0.DatasetParameter().correct_boundary_list for nmber in boundary_list: ax.axhline(y=nmber, xmin=0, xmax=1, c="red", linewidth=0.5, zorder=0) ax = axs[1] ax.plot(bins, correct_num_list) ax.set_title("time z score distribution") ax.set_xlim([-2, 4]) plt.savefig('./result/assistment2009/time_distribution_correctness_' + str(datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")) + '.png') # plt.show() # add a colum according to correctness boundary time_level_list = [] temp_list = list(data['time']) bd = code0.DatasetParameter().time_boundary_list # 0 ~ time <-0.8 # 1 ~ -0.8 < time < -0.6 # 2 ~ -0.6 < time < 0 # 3 ~ 0 < time for idx in range(len(temp_list)): if temp_list[idx] <= bd[0]: time_level_list.append(0) elif (bd[0] < temp_list[idx] and temp_list[idx] <= bd[1]): time_level_list.append(1) elif (bd[1] < temp_list[idx] and temp_list[idx] <= bd[2]): time_level_list.append(2) elif (temp_list[idx] > bd[2]): time_level_list.append(3) else: raise Exception("Error in time division") data['time_level'] = time_level_list return data