def test(root, outputdir, invis='fusion20', inir='fusion', predict='vis', model=None): if torch.cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') net = torch.load(model)[0] net.cuda() dirvis = root.replace('fusion', invis) + '/VIS' dirir = root.replace('fusion', inir) + '/IR' from progressbar import progressbar as pb for vis, ir in pb( zip(sorted(os.listdir(dirvis)), sorted(os.listdir(dirir)))): f = vis vis = f'{dirvis}/{vis}' ir = f'{dirir}/{ir}' imgvis = data.read(vis) imgir = data.read(ir) input = torch.cat([imgvis, imgir], dim=0) if predict != 'vis': input = input.roll(shifts=1, dims=0) input = input[:net.inchannels, ...] input = input.unsqueeze(0).to(device) with torch.no_grad(): out = net(input) write_tensor(f'{outputdir}/{f}.tif', out * 255)
def generate_bar(ln, text, redirect_stdout=False): return pb(min_value=0, max_value=ln, widgets=[ FormatLabel(text), ] + base_widgets, redirect_stdout=redirect_stdout)
def get_corrs(data, adjust=identity, corr_func='pearson'): max_slice = defaultdict(int) for sl in data.columns: sl = sl.split('_sl') emb = sl[0] max_slice[emb] = max(max_slice[emb], int(sl[1][0:2])) xs = pd.Series(index=data.columns, data=[int(a.split('_sl')[1][:2])/max_slice[a.split('_sl')[0]] for a in data.columns if 'sl' in a]) corrs_same = defaultdict(list) corrs_diff = defaultdict(list) all_corrs = [corrs_diff, corrs_same] for emb1_name in pb()(max_slice): emb1 = data.select(**sel_startswith(emb1_name)).applymap(adjust) genotype = emb1_name.split('_')[0] xs1 = xs.select(startswith(emb1_name)) for emb2_name in max_slice: if emb1_name == emb2_name: continue emb2 = data.select(**sel_startswith(emb2_name)).applymap(adjust) xs2 = xs.select(startswith(emb2_name)) closest = { column: min((abs(x2 - x1), c2) for c2, x2 in xs2.items())[1] for column, x1 in xs1.items() } for col in emb1.columns: same = genotype == emb2_name.split('_')[0] all_corrs[same][genotype].append(emb1.ix[:, col].corr( emb2.ix[:, closest[col]], corr_func, )) return all_corrs
def get_subspace_pics(self): if self.subspace_pics is not None: return self.subspace_pics pics_reshaped = self.get_reshaped_pics( ) if self.reshaped_pics is None else self.reshaped_pics mean_pic = self.get_mean_pic( ) if self.mean_pic is None else self.mean_pic centered_pics = self.get_centered_pics( ) if self.centered_pics is None else self.centered_pics dim = np.min((np.linalg.matrix_rank(centered_pics[:, :, 0]), np.linalg.matrix_rank(centered_pics[:, :, 1]), np.linalg.matrix_rank(centered_pics[:, :, 2]))) svd_r, svd_g, svd_b = self.compute_svd( ) if self.svd is None else self.svd result = np.zeros((dim, self.num_pics, 3)) for i in pb(range(self.num_pics)): result[:, i, 0] = svd_r[0][:, :dim].T @ (pics_reshaped[:, i, 0] - mean_pic[:, 0]) result[:, i, 1] = svd_g[0][:, :dim].T @ (pics_reshaped[:, i, 1] - mean_pic[:, 1]) result[:, i, 2] = svd_b[0][:, :dim].T @ (pics_reshaped[:, i, 2] - mean_pic[:, 2]) self.subspace_pics = result return result
def gen_statistics(match_method, iters=50, debug=False): fnames = glob("test/f*.png") match = [] mismatch = [] for i in pb(range(iters)): f1_test = random.choice(fnames) f1_num = int(f1_test[6:10]) f2_num = f1_num + random.randint(-1, 1) f2_test_glob = "test/s" + str(f2_num).zfill(4) + "*.png" try: f2_test = glob(f2_test_glob)[0] match_val = match_method(f1_test, f2_test) except Exception as e: continue if f1_num == f2_num: match.append(match_val) else: mismatch.append(match_val) #if debug: #print(f"Matches: {match}") #print(f"Mismatches: {mismatch}") false_reject = [a for a in match if a is False] false_accept = [a for a in mismatch if a is True] false_reject_rate = (len(false_reject) / len(match)) false_accept_rate = (len(false_accept) / len(mismatch)) print(f"False acceptance rate: {false_accept_rate}") print(f"False reject rate: {false_reject_rate}") return ((false_accept_rate, false_reject_rate))
def execute(function: cocoex.Problem, algorithm: Callable, population_size: int = 100, generations: int = 100, initialization: Callable = np.random.uniform, show_progress=False) -> Tuple[np.array, np.array]: """ Run algorithm on the function. :param function: Function on which to run. :param algorithm: Algorithm to execute. :param population_size: Population size. :param generations: How many generations to execute. :param initialization: Initialization of the first population. :param show_progress: Whether to show progress. :return: Tuple of population in shape (generations, population_size, function.dimension) and evaluations in shape (generations, population_size). """ populations = [] evaluations = [] counter = range(generations - 1) if show_progress: counter = pb(counter) population = initialization(function.lower_bounds, function.upper_bounds, [population_size, function.dimension]) for gen in counter: populations.append((population if isinstance(population, np.ndarray) else population.numpy()).copy()) evaluations.append(evaluate(function, populations[-1])) population = algorithm(population, evaluations[-1], function) return np.stack(populations, axis=0), np.stack(evaluations, axis=0)
def get_background_snprate(tsss, dnase, snps, exons): len_regions = 0 num_snps = 0 snprate_dict = {} for i in pb()(tsss.index): tss = tsss.ix[i] if isinstance(tss, pd.Series): n, l = get_snps_and_len(tss, dnase, snps, set(), exons) len_regions += l num_snps += n if l: snprate_dict[i] = n/l elif isinstance(tss, pd.DataFrame): seen_dnase2 = set() ns = 0 ls = 0 for i, tss in tss.iterrows(): n, l = get_snps_and_len(tss, dnase, snps, seen_dnase2, exons) len_regions += l ls += l num_snps += n ns += n if ls: snprate_dict[i] = ns/ls return num_snps / len_regions, num_snps, len_regions, snprate_dict
def fstpso_multiple(function: cocoex.Problem, repeats=10, population_size: int = 100, generations: int = 100, show_progress=False) -> Tuple[np.array, np.array]: """ Run the FST-PSO algorithm multiple times. :param function: Function to optimize. :param repeats: How many times to repeat the execution. :param population_size: Number of particles in the swarm. :param generations: How many iterations to perform. :param show_progress: Whether to show progress (verbose logging). :return: Tuple of population in shape (repeats, generations, population_size, function.dimension) and evaluations in shape (repeats, generations, population_size). """ counter = range(repeats) if show_progress: counter = pb(counter) populations = [] evaluations = [] for iteration in counter: population, evaluation = fstpso(function, population_size, generations, False) populations.append(population) evaluations.append(evaluation) return np.stack(populations), np.stack(evaluations)
def get_corrs(data, adjust=identity, corr_func='pearson'): max_slice = defaultdict(int) for sl in data.columns: sl = sl.split('_sl') emb = sl[0] max_slice[emb] = max(max_slice[emb], int(sl[1][0:2])) xs = pd.Series(index=data.columns, data=[ int(a.split('_sl')[1][:2]) / max_slice[a.split('_sl')[0]] for a in data.columns if 'sl' in a ]) corrs_same = defaultdict(list) corrs_diff = defaultdict(list) all_corrs = [corrs_diff, corrs_same] for emb1_name in pb()(max_slice): emb1 = data.select(**sel_startswith(emb1_name)).applymap(adjust) genotype = emb1_name.split('_')[0] xs1 = xs.select(startswith(emb1_name)) for emb2_name in max_slice: if emb1_name == emb2_name: continue emb2 = data.select(**sel_startswith(emb2_name)).applymap(adjust) xs2 = xs.select(startswith(emb2_name)) closest = { column: min((abs(x2 - x1), c2) for c2, x2 in xs2.items())[1] for column, x1 in xs1.items() } for col in emb1.columns: same = genotype == emb2_name.split('_')[0] all_corrs[same][genotype].append(emb1.ix[:, col].corr( emb2.ix[:, closest[col]], corr_func, )) return all_corrs
def execute_multiple(function: cocoex.Problem, algorithm: Callable, repeats: int = 10, population_size: int = 100, generations: int = 100, initialization: Callable = np.random.uniform, show_progress=False) -> Tuple[np.array, np.array]: """ Run algorithm multiple times on the function. :param function: Function on which to run. :param algorithm: Algorithm to execute. :param repeats: How many times to repeat the execution. :param population_size: Population size. :param generations: How many generations to execute. :param initialization: Initialization of the first population. :param show_progress: Whether to show progress. :return: Tuple of population in shape (repeats, generations, population_size, function.dimension) and evaluations in shape (repeats, generations, population_size). """ counter = range(repeats) if show_progress: counter = pb(counter) populations = [] evaluations = [] for iteration in counter: population, evaluation = execute(function, algorithm, population_size, generations, initialization, False) populations.append(population) evaluations.append(evaluation) return np.stack(populations), np.stack(evaluations)
def calc_supports_on_binaries(self, threshold=None, get_weights=False): binarized_df = self.onehot_numerical() cols = binarized_df.columns.tolist() cols_d = {i:cols[i] for i in range(len(cols))} X_train, X_test = train_test_split(binarized_df, test_size=self.test_size, random_state=self.seed) self.train_df = X_train self.test_df = X_test if get_weights: X_test = deepcopy(X_train) X_train_np, X_test_np = np.array(X_train), np.array(X_test) X_train_pos = np.array(X_train[X_train[self.target] == 1]) X_train_neg = np.array(X_train[X_train[self.target] == 0]) support_pos, support_neg = [], [] for i in pb(range(len(X_test))): support_pos_i, support_neg_i = [], [] for j in range(len(X_train)): test_ones = np.where(X_test_np[i,:-1] == 1)[0] train_ones = np.where(X_train_np[j,:-1] == 1)[0] both_ones = np.intersect1d(test_ones, train_ones) if len(both_ones) > 0: X_sub_pos = deepcopy(X_train_pos) X_sub_neg = deepcopy(X_train_neg) X_sub_pos_slice = X_sub_pos[:,both_ones] X_sub_neg_slice = X_sub_neg[:,both_ones] X_sub_pos_sum = np.sum(X_sub_pos_slice, axis=1) X_sub_neg_sum = np.sum(X_sub_neg_slice, axis=1) X_sub_pos_chosen = X_sub_pos_sum[X_sub_pos_sum == len(both_ones)] X_sub_neg_chosen = X_sub_neg_sum[X_sub_neg_sum == len(both_ones)] support_pos_i.append(len(X_sub_pos_chosen) / len(X_train_pos)) support_neg_i.append(len(X_sub_neg_chosen) / len(X_train_neg)) if self.threshold: support_pos_i = np.array(support_pos_i) support_neg_i = np.array(support_neg_i) support_pos_i = support_pos_i[support_pos_i > self.threshold] support_neg_i = support_neg_i[support_neg_i > self.threshold] support_pos.append(support_pos_i) support_neg.append(support_neg_i) return support_pos, support_neg
def transform_df(self, df, process=False, stop=False, stem=False, tags=False, reformat='summary', columns=['title', 'question', 'answers']): """Convenience function using dataframe with columns: tags (str), title (str), question (str), answers (str) See strmatch() for additional argument details If reformat = 'full', then text is preprocessed and stemmed in-place. """ if tags or "tags" in reformat or reformat == 'full': df['tags'] = df.tags.progress_apply( lambda x: ' '.join(x.split('|'))) if process or "process" in reformat or reformat == 'full': for col in columns: df.loc[:, col] = df[col].progress_apply(self.preprocess) if stop or "stop" in reformat or reformat == 'full': en_stop = set(stopwords.words('english')) en_stop.update([ 'use', 'like', 'tri', 'get', 'set', 'way', 'may', 'would', 'could', 'might', 'also' ]) for col in columns: df[col] = df[col].progress_apply(lambda x: ' '.join( [i for i in x.split() if not i in en_stop])) if stem or "stem" in reformat or reformat == 'full': for col in columns: df.loc[:, col] = df[col].progress_apply(self.stem) if 'only' in reformat: return df process = False stem = False column = [] for i in pb(df.itertuples()): text = "{} {} {} {}".format(' '.join(i.tags.split('|')), i.title, i.question, i.answers) column.append( self.strmatch(text, process=process, stem=stem, reformat=reformat)) if reformat == 'summary' or reformat == 'full': df['uniq'] = [i[0] for i in column] df['raw'] = [i[1] for i in column] df['words'] = ['|'.join(i[2]) for i in column] df['len'] = [i[3] for i in column] df['ratio'] = [i[4] for i in column] else: df['strmatch'] = column return df
def compute_eps_0(self): if self.eps_0 is not None: return self.eps_0 reshaped_pics = self.get_reshaped_pics() second_dists = [] for i in pb(range(self.num_pics)): cur_dists = self.calc_distances(reshaped_pics[:, i, :]) cur_dists = sorted(cur_dists, key=lambda tup: tup[1]) second_dists.append(cur_dists[1][1]) eps_0 = np.mean(second_dists) * 1.5 self.eps_0 = eps_0 return eps_0
def psnr(root, outputdir, predict='vis'): criterion = nn.MSELoss() d = 'VIS' if predict == 'ir': d = 'IR' gtdir = root + '/' + d + '/' filt = lambda l: sorted(filter(lambda x: not x.endswith('.psnr'), l)) from progressbar import progressbar as pb for gt, pred in pb( zip(filt(os.listdir(gtdir)), filt(os.listdir(outputdir)))): pred = f'{outputdir}/{pred}' groundtruth = data.read(f'{gtdir}/{gt}') predicted = data.read(pred) mse = criterion(groundtruth, predicted).item() psnr = mse_to_psnr(mse) open(pred + '.psnr', 'w').write(f'{psnr:.3f}')
EC.element_to_be_clickable( (By.XPATH, "//span[contains(text(), 'Next')]"))).click() driver.get("https://data.stackexchange.com/account/login") driver.find_element_by_css_selector( "div[class='preferred-login']").find_element_by_css_selector( 'span').click() #%%####################################################################### # Basic Data # ########################################################################## ## Read queries q = 'select Id as id, CreationDate as creationdate from Posts where Id IN ({})' ## Get SO (change sql when necessary) for count, batch in pb(enumerate(so_ids)): driver.get("https://data.stackexchange.com/stackoverflow/query/new") query = q.format(','.join(batch)) jscript = "document.getElementsByClassName('CodeMirror')[0].CodeMirror.setValue('{}')".format( query) driver.execute_script(jscript) ## Submit Query and download results button = wait.until( EC.element_to_be_clickable( (By.CSS_SELECTOR, "button[id='submit-query"))) button.click() results = wait.until( EC.element_to_be_clickable( (By.CSS_SELECTOR, "a[id='resultSetsButton"))) results_link = results.get_attribute('href')
def main(): try: parser = OptionParser() parser.add_option("-p", "--vid_path", dest="vid_fldr", help="Path to Videos to test.",default='../../../../data/Videos/All/') parser.add_option("--fp", "--frame_path", dest="frame_fldr", help="Path to Frames to test",default='../../../../Frames/Real/'), parser.add_option("-f", "--frame_gen", dest="frame_gen",help="Frame Generator Selection; if flag included, will generate frames from extracted frames", action= 'store_false', default=True) parser.add_option("--lp", "--label_path", dest="label_path", help="Path to GT labels (for visualization)", default='./labels/'), parser.add_option("-r", "--results", dest="generateCSV",help="Flag. Produces detection text files", action= 'store_false', default=True) parser.add_option("-v", "--visualize", dest="visualize", help="Flag. Shows preview of video and detections",action= 'store_true', default=False) parser.add_option("--vb", "--verbose", dest="verbose", help="Flag. Prints outputs from FRCNN,Tracker and Classifier",action= 'store_true', default=False) parser.add_option("-n", "--vid", dest="vidname", help=" Name of videos to test (if testing specific videos)" ,default=None) parser.add_option("--scaleBoxes", dest="scaleBoxes", help=" float : 0.0 -> 1.0 \n Amount boxes are scaled after detection ", default=1.0) parser.add_option("--showROI", dest="showROI", help="Bool. Preview detected regions in seperate window", action= 'store_true', default=False) parser.add_option("--part", dest="test_part", help="Generate results for part of test dataset\n --part <integer 1-6>" ,default=None) parser.add_option("--continue", dest="continue_last", help="Continues result generation from video last processed; used in case code terminates prematurely",action= 'store_true' ,default=False) (options, args) = parser.parse_args() # path to video/frames vid_fldr = options.vid_fldr frame_fldr = options.frame_fldr if options.frame_gen and not os.path.exists(vid_fldr): raise Exception("Error: video directory does not exist") elif not options.frame_gen and not os.path.exists(frame_fldr): raise Exception("Error: frame directory does not exist") # path to labels (for visualization) gtlabels = options.label_path # flags verbose = options.verbose visualize = options.visualize generateCSV = options.generateCSV # maximum videos frames to hold in queue max_vid_que = 1 # Queues (will hold 1200 frames ~ 4 videos at max. # Will start filling again once consumed by model) imgQ = Queue(maxsize = 300*max_vid_que) # Queue for Images to be used by Sign Classifier imgVisualizeQ = Queue(maxsize = 300*max_vid_que) # Queue for Images to be used by visualization code frcnnQL = Queue(maxsize = 300*max_vid_que) # Queue for Images to be used by FRCNN (bounding box detector), left half of image frcnnQR = Queue(maxsize = 300*max_vid_que) # Queue for Images to be used by FRCNN (bounding box detector), right half of image LKTrackQ = Queue(maxsize = 300*max_vid_que) # Queue for Images to be used by box tracker system (Lucas Kanade/Optical Flow) cnnQ = Queue(maxsize = 300*max_vid_que) # Queue for bounding boxes to be used by Sign Classifier (will get co-ordinates and crop accodingly) bboxQ = Queue() # Queue for bounding boxes to be used by tracker system trackerOutQ = Queue() # Queue for bounding boxes output from tracker system (used for visualization) cnnOutQ = Queue() # Queue for classes output from sign Classifier (used for feedback in tracker system) chTrackQ = Queue() # Queue for keeping track of challenge types (used to dyanmically change classifier model weights) # Initializing settings for neural networks frcnnSettings = optionsFRCNN() cnnSettings = optionsCNN() print "\nInitializing Tracker... ", tracker = hybridTracker() print "Complete." print "\nInitializing FRCNN ... ", C,frcnnRPN,frcnnClass,frcnnClassOnly = setupFRCNN(frcnnSettings) print "Complete." print "\nBuilding Neural Networks ...", cnn = createModel(cnnSettings) loadWeights(cnn,cnnSettings.model_weights_default) print "Complete." test_vid_rea = [4,5,6,7,8,18,19,21,24,26,31,38,39,41,47] # test video sequences (real videos) test_vid_syn = [2,4,6,9,12,13,16,17,18,20,22,28,31,32,36] # test video sequences (synthesized videos) # When we generated the result, we split the test videos into 6 parts and ran it on 6 PCs if options.test_part is not None: if int(options.test_part) == 1: test_vid_rea = test_vid_rea[:5] # first 5 real video sequences test_vid_syn = [] elif int(options.test_part) == 2: test_vid_rea = test_vid_rea[5:10] # second 5 real video sequences test_vid_syn = [] elif int(options.test_part) == 3: test_vid_rea = test_vid_rea[10:] # third 5 real video sequences test_vid_syn = [] elif int(options.test_part) == 4: test_vid_rea = [] test_vid_syn = test_vid_syn[:5] # first 5 synthesized video sequences elif int(options.test_part) == 5: test_vid_rea = [] test_vid_syn = test_vid_syn[5:10] # second 5 synthesized video sequences elif int(options.test_part) == 6: test_vid_rea = [] test_vid_syn = test_vid_syn[10:] # third 5 synthesized video sequences # if testing individual videos, command line argument used if options.vidname is not None: vidname = options.vidname.split(",") # else all videos in test dataset processed else: vidname = [] vidname.extend( ["01_%02d_00_00_00" %(seq) for seq in test_vid_rea] ) # Real No Challenge vidname.extend( ["02_%02d_00_00_00" %(seq) for seq in test_vid_syn] ) # Syn No Challenge vidname.extend( ["01_%02d_01_%02d_%02d" %(seq,eff,lvl) for seq in test_vid_rea for eff in range(1,13) for lvl in range(1,6)] ) # Real Challenge vidname.extend( ["02_%02d_01_%02d_%02d" %(seq,eff,lvl) for seq in test_vid_syn for eff in range(1,12) for lvl in range(1,6)] ) # Syn Challenge print "\nGenerating Detections for all test videos (%d videos):" % len(vidname) print "-----------------------------------------------------------" # if --continue flag given, we skip videos for which detection file already generated if options.continue_last: adjusted_vidname = [] print "\n" for vid in vidname: detection_file = './detections/' + vid + '.txt' if os.path.exists(detection_file): print "Skipping %s: Detection file already exists" %vid continue else: adjusted_vidname.append(vid) vidname = adjusted_vidname # frameGenerator 1 gets frames from video, 2 from extracted frames if options.frame_gen: frames = frameGenerator1(vidname,vid_fldr,classify=True) else: frames = frameGenerator2(vidname,frame_fldr,classify=True) # need to iterate generator once before handing over to another thread # this is because of peculiar way keras's model.predict() behaves when multi-threading overlap = 70 ch_type,img = frames.next() imgQ.put(img) imgVisualizeQ.put(img) frcnnQL.put(img[0:660,0:814+overlap/2]) frcnnQR.put(img[0:660,814-overlap/2:1627]) LKTrackQ.put([ch_type,cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)]) # thread to feed get frames,preprocess and feed queues t1 = threading.Thread(target=populateQ,args=(frames,imgQ,imgVisualizeQ,frcnnQL,frcnnQR,LKTrackQ,chTrackQ,visualize,)) t1.setDaemon(True) t1.start() # thread for tracker module t2 = threading.Thread(target=tracker.track,args=(LKTrackQ,cnnOutQ,bboxQ,cnnQ,trackerOutQ,)) t2.setDaemon(True) t2.start() # Setting up generator functions Right = findBBox(frcnnQR,'R',C,frcnnSettings,frcnnRPN,frcnnClassOnly) # bounding box detector (right side of frames) Left = findBBox(frcnnQL,'L',C,frcnnSettings,frcnnRPN,frcnnClassOnly) # bounding box decetor (left side of frames) Classify = classifySign(cnn,cnnQ,cnnOutQ,imgQ,cnnSettings,class_map=None,scale=float(options.scaleBoxes),showROI=options.showROI) # sign classifier for vid in vidname: try: print "\n\n---------------------------------" print "Processing:\t %s" % vid print "---------------------------------\n" if not verbose: bar = pb(max_value = 300) # progress bar # loading labels to plot ground truth if visualize: try: labels = np.genfromtxt(gtlabels + vid[:5] +'.txt',delimiter='_') fnum = labels[:,0] gtbox = np.hstack((labels[:,2:4],labels[:,8:10])) except: print "\nError Loading Labels: please check label folder directory" print "Continuing without visualization\n" visualize = False pass # creating files to store detections if generateCSV: if not os.path.exists('./detections/'): os.makedirs('./detections/') detection_file = './detections/' + vid + '.txt' f = open( detection_file,'w') f.write("frameNumber_signType_llx_lly_lrx_lry_ulx_uly_urx_ury\n") # wait for queue to populate while frcnnQR.empty(): continue idx = 0 # frame index # keeping track of challenge type/enviroment conditions prev_chType = 'NoCh' while idx<300: idx +=1 # checking challenge type every 7 frames and changing classifier model weights for particular challenget type if idx % 7 == 0: curr_chType = chTrackQ.get() if prev_chType != curr_chType: loadWeights(cnn,cnnSettings.model_weights_effects[curr_chType]) if verbose: print "\n---------------------------------------" print "Changing Classifier Model to : %s " % curr_chType print "---------------------------------------\n" prev_chType = curr_chType # bbox array will contain bounding boxes detected by FRCNN bbox = [] # combining co-ordinates of boxes from left and right side of image bbox.extend(Left.next()) bbox.extend(Right.next()) # passsing boxex to tracker module through queue bboxQ.put(bbox) # classifier gets boxes from tracker module through another queue and returns box classes y = Classify.next() # boxes forwared to classifier by tracker (used for visualization and writing to file) bboxT = trackerOutQ.get().tolist() trackerOutQ.task_done() if verbose: print "Frame %03d: BBoxes :\t"%idx, print bboxT print " Class :\t", print y print"\n" if visualize: img = imgVisualizeQ.get() overlay = img.copy() cv2.rectangle(overlay,(5,5),(330,70),(10,10,10),-1) cv2.putText(overlay,"Frame : %03d"%idx ,org = (10,50),fontFace =cv2.FONT_HERSHEY_SIMPLEX,color=(255,255,20),thickness=4,fontScale=1.5) # plotting Tracker output (Green) for sign,box in enumerate(bboxT): try: box = scaleBoxes(box,float(options.scaleBoxes)) cv2.rectangle(overlay,(box[0],box[1]),(box[2],box[3]),(0,255,0),4) if y is not []: cv2.rectangle(overlay,(box[0]-10,box[3]+38),(box[0]+138,box[3]+3),(10,10,10),-1) cv2.putText(overlay,"Class: %d"%(y[sign]),org=(box[0],box[3]+30),fontFace=cv2.FONT_HERSHEY_SIMPLEX,color=(255,255,20),thickness=2,fontScale=0.9) except: continue # plotting GT boxes (Blue) try: for box in np.int16(gtbox[np.where(fnum==idx)]): box = scaleBoxes(box,float(options.scaleBoxes)) cv2.rectangle(overlay,(box[0],box[1]),(box[2],box[3]),(255,0,0),2) except: pass cv2.addWeighted(overlay,0.7,img,0.3,0,img) img = cv2.resize(img,(814,618),interpolation=cv2.INTER_CUBIC) cv2.imshow('img',img) cv2.waitKey(1) imgVisualizeQ.task_done() if generateCSV: formatResults(f,idx,bboxT,y) # writing to file if not verbose: bar.update(idx) # updating progress bar if idx == 300: break if generateCSV: f.close() except StopIteration: if generateCSV: f.close() except Exception as e: print "\nExiting : %s \n" %e if generateCSV: f.close() os.remove(detection_file) # removing last opened file except KeyboardInterrupt: print "\n---------------\nUser Stopped the Program.\n" if generateCSV: f.close() os.remove(detection_file) # removing last opened file
def compute_eps_1(self, path_to_nofaces=None): if self.eps_1 is not None: return self.eps_1 path_list = [] for path, subdirs, files in os.walk(path_to_nofaces): for name in files: if name.endswith('.jpg'): path_list.append(os.path.join(path, name)) pics_list = [] for path in path_list: pic = Image.open(path) # ресайз кстати можно pic = pic.resize((self.pic_size, self.pic_size)) pics_list.append(np.array(pic)) pic.close() pics_list = np.array(pics_list) pics_reshaped = np.zeros((self.pic_size**2, len(pics_list), 3)) for i, pic in enumerate(pics_list): for j in range(3): pics_reshaped[:, i, j] = pic[:, :, j].reshape(-1, 1)[:, 0] eps_1_vals = [] projections = self.get_subspace_pics( ) if self.subspace_pics is None else self.subspace_pics r, g, b = self.compute_svd() if self.svd is None else self.svd mean_pic = self.get_mean_pic( ) if self.mean_pic is None else self.mean_pic dim = projections.shape[0] for i in pb(range(len(pics_list))): pic_projection_r = r[0][:, :dim].T @ ( pics_reshaped[:, i, :][:, 0] - mean_pic[:, 0]) pic_projection_g = g[0][:, :dim].T @ ( pics_reshaped[:, i, :][:, 1] - mean_pic[:, 1]) pic_projection_b = b[0][:, :dim].T @ ( pics_reshaped[:, i, :][:, 2] - mean_pic[:, 2]) f_p_r = r[0][:, :dim] @ pic_projection_r f_p_g = g[0][:, :dim] @ pic_projection_g f_p_b = b[0][:, :dim] @ pic_projection_b e_f_r = np.linalg.norm((pics_reshaped[:, i, :][:, 0] - mean_pic[:, 0]) - f_p_r) e_f_g = np.linalg.norm((pics_reshaped[:, i, :][:, 1] - mean_pic[:, 1]) - f_p_g) e_f_b = np.linalg.norm((pics_reshaped[:, i, :][:, 2] - mean_pic[:, 2]) - f_p_b) eps_1_vals.append(np.mean([e_f_r, e_f_g, e_f_b])) eps_1 = np.mean(eps_1_vals) self.eps_1 = eps_1 return eps_1
""" UpstreamCounts.py Script to get the actual number of A's, C's, T's, and G's upstream of genes """ import pandas as pd from Bio import SeqIO from OrderedSeqRec import OrderedSeqRecord from sys import argv from collections import Counter from progressbar import ProgressBar as pb if __name__ == "__main__": coords = pd.read_table(argv[1]) seqs = {rec.id: OrderedSeqRecord(rec) for rec in SeqIO.parse(argv[2], 'fasta')} counts = Counter() for ix in pb()(coords.index): row = coords.ix[ix] counts.update(seqs[row.chrom][row.max_upstream:row.tss]) print(counts)
'max_width': 880, 'progress_bar': False, 'split_columns': True, 'total_width': 200, 'nan_replace' : 0.5, 'vspacer': 0} diffs = set() for gene, diff in (female_logistic_r2 - male_logistic_r2).items(): if female_logistic_r2[gene] > .4 and abs(diff) > .25: diffs.add(gene) for gene, diff in (female_peak_r2 - male_peak_r2).items(): if female_peak_r2[gene] > .4 and abs(diff) > .25: diffs.add(gene) for gene in pb()(diffs): pu.svg_heatmap( ( None, expr_females.ix[[gene]], None, ase_females.ix[gene], None, ase_males.select(**sel_startswith('melXsim')).ix[gene], None, ase_males.select(**sel_startswith('simXmel')).ix[gene], None, expr_males.select(**sel_startswith('melXsim')).ix[[gene]], None, expr_males.select(**sel_startswith('simXmel')).ix[[gene]], ), 'analysis_godot/results/sex_diff/{}.svg'.format(gene), norm_rows_by=( 'female expression', 'max', 'females - L{:.03f} P{:.03f}'.format(female_logistic_r2[gene], female_peak_r2[gene]), 'center0pre',
return phi_c RES = 1000 x = np.linspace(-3.5, 1.0, RES) def Phi(M, typ): #zs = iso.zs zs = np.linspace(-100.0, 100.0, 1000) # This was where the bug was phis = np.array([phi(M, z, [typ]) for z in zs]) MDFs = np.array([MDF(z) for z in zs]) ys = phis * MDFs I = trapz(ys, zs) return I plt.figure() plt.xlabel("Magnitude") plt.ylabel("Luminosity Function (Arbitrary Units)") for i in pb([1, 2, 3], redirect_stdout=True): # Smoothing and plotting ys = np.array([Phi(M, i) for M in x]) np.save("Results/SALF/xs_t" + str(i), x) np.save("Results/SALF/ys_t" + str(i), ys) plt.plot(x, ys) print("Done Branch ", i) plt.show()
parser.add_argument('species2', type=str, help='The second species (e.g. "sim")') args = parser.parse_args() script = open('qsub_base.sh').read() species1_gtf = open('Reference/{}_good.gtf'.format(args.species1)) species2_gtf = open('Reference/{}_good.gtf'.format(args.species2)) species1_chroms = {line.split()[0] for line in species1_gtf} species2_chroms = {line.split()[0] for line in species2_gtf} print(species1_chroms, species2_chroms) items = list(itertools.product(species1_chroms, species2_chroms)) jobs = [] for id1, id2 in pb()(items): job = script.format( job_name=id1+'_'+id2, id1 = id1, id2 = id2, species1=args.species1, species2=args.species2) jobs.append(Popen(['qsub'], stdin=PIPE)) jobs[-1].communicate(bytes(job, 'ASCII')) for job in jobs: job.wait()
'progress_bar': False, 'split_columns': True, 'total_width': 200, 'nan_replace': 0.5, 'vspacer': 0 } diffs = set() for gene, diff in (female_logistic_r2 - male_logistic_r2).items(): if female_logistic_r2[gene] > .4 and abs(diff) > .25: diffs.add(gene) for gene, diff in (female_peak_r2 - male_peak_r2).items(): if female_peak_r2[gene] > .4 and abs(diff) > .25: diffs.add(gene) for gene in pb()(diffs): pu.svg_heatmap( ( None, expr_females.ix[[gene]], None, ase_females.ix[gene], None, ase_males.select(**sel_startswith('melXsim')).ix[gene], None, ase_males.select(**sel_startswith('simXmel')).ix[gene], None, expr_males.select(**sel_startswith('melXsim')).ix[[gene]], None, expr_males.select(**sel_startswith('simXmel')).ix[[gene]], ),
mel = expr.select(**ut.sel_startswith('melXmel_')) sim = expr.select(**ut.sel_startswith('simXsim_')) hyb = expr.select(**ut.sel_startswith(('melXsim', 'simXmel'))) expr_in_mel = (mel.max(axis=1) > EXPR_MIN) expr_in_sim = sim.max(axis=1) > EXPR_MIN expr_in_hybrids = (hyb.max(axis=1) > EXPR_MIN) expr_in_all = (expr_in_mel & expr_in_sim & expr_in_hybrids) expr = expr.ix[expr_in_all] embryo_types = {c.split('_sl')[0].split('_rep')[0] for c in expr.columns} embryos = {} for etype in embryo_types: embryos[etype] = { c.split('_sl')[0] for c in expr.columns if c.startswith(etype) } combs = sum([sorted(it.combinations(e, 2)) for e in embryos.values()], []) combs += list( it.product(embryos['melXsim_cyc14C'], embryos['simXmel_cyc14C'])) emds = pd.DataFrame(index=expr.index, columns=["{}-{}".format(*c) for c in combs], data=-1) for gene in pb()(expr.index): for e1, e2 in combs: emds.ix[gene, "{}-{}".format(e1, e2)] = (dd.earth_mover_multi_rep( expr.ix[gene].select(ut.startswith(e1)) + EXPR_MIN, expr.ix[gene].select(ut.startswith(e2)) + EXPR_MIN, ))
def fit(self, train_data, valid_data=None, model_path=None, epochs=1, log_file=None, callbacks=[]): assert isinstance(train_data, torch.utils.data.DataLoader) self.write_log(log_file, time.asctime()) self.init_callbacks(callbacks) for epo in range(epochs): for cbk in callbacks: #TODO: the parameters will be determined later if hasattr(cbk, 'on_epoch_begin'): cbk.on_epoch_begin() self.write_log(log_file, 'Epoch {}/{}'.format(epo + 1, epochs)) self.model.train() history = [] for x, y in pb(train_data): if x is None: continue self.optimizer.zero_grad() record = [] if self.transform: with torch.no_grad(): x = self.transform(x) pred = self.model(x) loss = self.loss(pred, y) loss.backward() self.optimizer.step() record.append(loss.item()) # eval metrics for metric in self.metrics: metric_value = metric(pred, y) record.append(metric_value) history.append(record) # print loss and metrics on this epoch statics = list(map(np.mean, list(zip(*history)))) loss_tip = "train loss is {:.4f}".format(statics[0]) metric_tips = list( map(", {} is {:.4f}".format, self.metrics_name, statics[1:])) tip = "".join([loss_tip, *metric_tips]) self.write_log(log_file, tip) if isinstance(valid_data, torch.utils.data.DataLoader): # valid self.model.eval() valid_history = [] with torch.no_grad(): for x, y in valid_data: if x is None: continue valid_record = [] if self.transform: x = self.transform(x) pred = self.model(x) loss = self.loss(pred, y) valid_record.append(loss.item()) for metric in self.metrics: metric_value = metric(pred, y) valid_record.append(metric_value) valid_history.append(valid_record) valid_statics = list(map(np.mean, list(zip(*valid_history)))) loss_tip = "valid loss is {:.4f}".format(valid_statics[0]) metric_tips = list( map(", {} is {:.4f}".format, self.metrics_name, valid_statics[1:])) tip = "".join([loss_tip, *metric_tips]) self.write_log(log_file, tip) if True not in list( map(lambda x: isinstance(x, Checkpoint), callbacks)): if model_path: torch.save(self.model, model_path) for cbk in callbacks: if hasattr(cbk, 'on_epoch_end'): if isinstance(cbk, LearningRateDecay): cbk.on_epoch_end(epo, valid_statics[0]) elif isinstance(cbk, Checkpoint): cbk.on_epoch_end(epo, valid_statics[0], valid_statics[1:], self.model) else: cbk.on_epoch_end()
chrom = entry[0] if entry[2] != 'exon': continue start = int(entry[3]) stop = int(entry[4]) for i in range(start, stop): exons[(chrom, i)] = True if i%1e4 == 0: print('.', end='') stdout.flush() print("Done loading exons") snp_rates = {} len_regions_dict = {} num_snps_dict = {} for gene in pb()(peak.index.union(logist.index)): tss = tsss.ix[gene] len_regions = 0 num_snps = 0 seen_dnase = set() if isinstance(tss, pd.Series): #print("Single CRM for gene", gene) num_snps, len_regions = get_snps_and_len(tss, dnase, snps, set(), exons) elif isinstance(tss, pd.DataFrame): #print("Multiple CRMs for gene", gene) for i, tss in tss.iterrows(): n, l = get_snps_and_len(tss, dnase, snps, seen_dnase, exons) len_regions += l num_snps += n else: assert False
def generate_bar(ln, text): return pb(min_value=0, max_value=ln, widgets=[FormatLabel(text), ] + base_widgets)
def main(): from train import default_rd # --------------------------- 命令行参数设置 --------------------------- parser = argparse.ArgumentParser() parser.add_argument( 'model', help='model 所在的文件夹' ) parser.add_argument( '-bs', '--batch_size', default=2, type=int, help='batch size,默认是2') parser.add_argument( '-nj', '--n_jobs', default=6, type=int, help='多核并行的核数,默认是6') parser.add_argument( '-is', '--input_size', default=(960, 600), type=int, nargs=2, help=( '默认是960, 600(是用于All的detection),因为增加了对于TCT的支持,' '这里不再能够使用int和None,必须指定2个' ) ) parser.add_argument( '-rs', '--random_seed', default=1234, type=int, help='随机种子数,默认是1234' ) parser.add_argument( '-rd', '--root_dir', default=default_rd(), help=( '数据集所在的根目录,其内部是子文件夹储存图片, 这里是' '和system的类型有关,默认是ALL的数据') ) parser.add_argument( '--no_normalize', action='store_false', help='是否对数据进行标准化,如果使用该参数则不进行标准化' ) parser.add_argument( '-bb', '--backbone', default='resnet50', help='使用的backbone网络,默认是resnet50' ) parser.add_argument( '-ps', default=[3, 4, 5, 6, 7], type=int, nargs='+', help='使用FPN中的哪些特征图来构建anchors,默认是p3-p7' ) parser.add_argument( '-ph', '--phase', default='valid', choices=['all', 'train', 'valid', 'test'], help='是对全部的数据集做还是对根据seed分出来的test或valid做,默认是valid' ) parser.add_argument( '-sd', '--save_dir', default='valid', help=( "会创建一个文件夹在模型所在的目录中,输出的图像都保存在其中,默认" "名称是valid" ) ) parser.add_argument( '-sr', '--save_root', default='./ALLresults', help='结果保存的根目录,默认是./ALLresults' ) parser.add_argument( '--top_k', default=0, type=int, help="使用的是排名第几的模型进行预测,默认是0" ) parser.add_argument( '--wh_min', default=None, type=int, help="默认是None,用于xml读取,过滤错误的框" ) parser.add_argument( '-nt', '--nms_thre', default=0.3, type=float, help="进行nms时使用的阈值,默认是0.3" ) parser.add_argument( '--custom_label_mapper', action='store_true', help="如果使用此参数,将把得到的标签列出,并自己给出起数字标签是什么" ) args = parser.parse_args() # --------------------------- 读取文件名称 --------------------------- data_df, label_set = get_data_df( args.root_dir, check=False, check_labels=True) if args.custom_label_mapper: label_mapper = {} for l in label_set: i = int(input('label--%s的数字标签是:' % l)) label_mapper[l] = i else: label_mapper = {l: i for i, l in enumerate(label_set)} num_classes = len(set(label_mapper.values())) print(label_mapper) # 数据集分割 if args.phase in ['train', 'valid', 'test']: trainval_df, test_df = train_test_split( data_df, test_size=0.1, shuffle=True, random_state=args.random_seed ) if args.phase == 'test': use_dat = test_df else: train_df, valid_df = train_test_split( trainval_df, test_size=1/9, shuffle=True, random_state=args.random_seed ) if args.phase == 'train': use_dat = train_df else: use_dat = valid_df else: use_dat = data_df # --------------------------- 数据集建立 --------------------------- img_transfer = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) img_transfer = transfers.OnlyImage(img_transfer) resize_transfer = transfers.Resize(args.input_size) test_transfers = transforms.Compose([ resize_transfer, img_transfer ]) y_encoder_args = { 'input_size': args.input_size, 'ps': args.ps, 'nms_thre': args.nms_thre} xml_parse = {} if args.wh_min is not None: xml_parse['wh_min'] = args.wh_min use_data = ColabeledDataset( use_dat, transfer=test_transfers, y_encoder_mode='object', label_mapper=label_mapper, xml_parse=xml_parse, **y_encoder_args ) use_dataloader = DataLoader( use_data, batch_size=args.batch_size, shuffle=False, num_workers=args.n_jobs, collate_fn=use_data.collate_fn) # --------------------------- 载入训练好的模型 --------------------------- if args.backbone == 'resnet50': backbone = models.resnet50 elif args.backbone == 'resnet101': backbone = models.resnet101 net = RetinaNet(backbone=backbone, ps=args.ps, num_class=num_classes) bests = torch.load( os.path.join(args.save_root, args.model, 'model.pth') ) state_dict = bests[args.top_k]['model_wts'] net.load_state_dict(state_dict) net.eval() # --------------------------- 预测 --------------------------- (labels_preds, markers_preds), (APs, mAP_score) = test( net.cuda(), use_dataloader, evaluate=True, predict=True, device=torch.device('cuda:0'), num_class=num_classes ) for k, v in label_mapper.items(): print('%s的AP是%.4f' % (k, APs[v])) print('mAP是%.4f' % mAP_score) # --------------------------- 可视化 --------------------------- # 创建文件夹保存结果 save_dir = os.path.join(args.save_root, args.model, args.save_dir) if not os.path.exists(save_dir): os.mkdir(save_dir) # 设置和图像预处理相反的操作,得到原始图像 to_pil = transforms.ToPILImage() no_norm = NoNormalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) # 遍历结果,并将结果画在图上 true_color_mapper = { i: tuple(TrueColorList[i]) for i in range(num_classes)} pred_color_mapper = { i: tuple(PredColorList[i]) for i in range(num_classes)} for j, ((imgs, labels, markers), labels_pred, markers_pred) in pb( enumerate(zip(use_dataloader, labels_preds, markers_preds)) ): imgs = no_norm(imgs) for i, (img, label, marker, label_pred, marker_pred) in enumerate( zip(imgs, labels, markers, labels_pred, markers_pred) ): img = to_pil(img) img = draw_rectangle( img, label.tolist(), marker.tolist(), color_mapper=true_color_mapper ) if label_pred.numel() == 0: img.save( os.path.join( save_dir, str(j*args.batch_size+i) + '.png' ) ) else: proba, idx_pred = label_pred.max(dim=1) img = draw_rectangle( img, idx_pred.cpu().numpy(), marker_pred.cpu().numpy(), color_mapper=pred_color_mapper, fonts=proba.cpu().numpy().round(4) ) img.save( os.path.join(save_dir, str(j*args.batch_size+i) + '.png'))
def run_raytrace(TheSystem, hx_arr, hy_arr, nsur, configurationRange): dfs = [] # Initialize x/y image plane arrays x_ary = np.empty([len(pxpys) * len(hx_arr) ]) # center field +4 extreme fields y_ary = np.empty([len(pxpys) * len(hy_arr)]) error_code = np.empty([len(pxpys) * len(hy_arr)], dtype=np.int32) vignette_code = np.empty([len(pxpys) * len(hy_arr)], dtype=np.int32) l = np.empty([len(pxpys) * len(hy_arr)], dtype=np.float32) m = np.empty([len(pxpys) * len(hy_arr)], dtype=np.float32) n = np.empty([len(pxpys) * len(hy_arr)], dtype=np.float32) px_output = np.empty([len(pxpys) * len(hx_arr)], dtype=np.float32) py_output = np.empty([len(pxpys) * len(hx_arr)], dtype=np.float32) hx_output = np.empty([len(pxpys) * len(hx_arr)], dtype=np.float32) hy_output = np.empty([len(pxpys) * len(hx_arr)], dtype=np.float32) # Adding Rays to Batch, varying normalised object height hy assert len(configurationRange) == 2 for configurationNumber in pb( range(configurationRange[0], configurationRange[1] + 1)): TheSystem.MCE.SetCurrentConfiguration(configurationNumber) TheSystem.SystemData.Fields.SetVignetting() raytrace = TheSystem.Tools.OpenBatchRayTrace() normUnPolData = raytrace.CreateNormUnpol( len(hx_arr) * len(pxpys), constants.RaysType_Real, nsur) normUnPolData.ClearData() waveNumber = 1 ray_counter = 0 for pxpy in pxpys: px, py = pxpy for j in range(len(hx_arr)): px_output[ray_counter], py_output[ray_counter] = px, py hx_output[ray_counter], hy_output[ray_counter] = hx_arr[ j], hy_arr[j] normUnPolData.AddRay(waveNumber, hx_arr[j], hy_arr[j], px, py, constants.OPDMode_None) ray_counter += 1 #! [e22s04_py] print('running raytrace...') baseTool = CastTo(raytrace, 'ISystemTool') baseTool.RunAndWaitForCompletion() normUnPolData.StartReadingResults() output = normUnPolData.ReadNextResult() j = 0 while output[0]: # success error_code[j] = output[2] vignette_code[j] = output[3] x_ary[j] = output[4] # X y_ary[j] = output[5] # Y l[j] = output[7] m[j] = output[8] n[j] = output[9] output = normUnPolData.ReadNextResult() j += 1 hx_deg = max_field * hx_output hy_deg = max_field * hy_output package = { 'hx_deg': hx_deg, 'hy_deg': hy_deg, 'x_pos': x_ary, 'y_pos': y_ary, 'px': px_output, 'py': py_output, 'error_code': error_code, 'vignette_code': vignette_code, 'l': l, 'm': m, 'n': n } df = pd.DataFrame(package) # end extracting rays r = np.sqrt(df['x_pos'].values**2 + df['y_pos'].values**2) sel = r < 85 dfs.append(df.iloc[sel]) baseTool.Close() return dfs
by_emb_sums = pd.DataFrame(index=good_ase.index, columns=sorted(embs)) for emb in embs: by_emb_sums.ix[:, emb] = good_ase.T.select(ut.startswith(emb)).sum() rando_sums = { emb: pd.DataFrame(index=good_ase.index, columns=np.arange(N_SHUFFLES), data=np.nan) for emb in embs } print("Randomizing", file=sys.stderr) with Pool() as p: results = [] for i in range(N_SHUFFLES): results.append(p.apply_async(shuffle_ase, (good_ase, embs))) for i in pb()(list(range(N_SHUFFLES))): result = results[i].get() for emb in result: rando_sums[emb].ix[:, i] = result[emb] pvals_by_emb = pd.DataFrame(index=good_ase.index, columns=sorted(embs), dtype=float) for gene in pb()(pvals_by_emb.index): for emb in pvals_by_emb.columns: pvals_by_emb.ix[gene, emb] = ( ( bisect( sorted(rando_sums[emb].ix[gene]), by_emb_sums[emb].ix[gene] ) )
""" UpstreamCounts.py Script to get the actual number of A's, C's, T's, and G's upstream of genes """ import pandas as pd from Bio import SeqIO from OrderedSeqRec import OrderedSeqRecord from sys import argv from collections import Counter from progressbar import ProgressBar as pb if __name__ == "__main__": coords = pd.read_table(argv[1]) seqs = { rec.id: OrderedSeqRecord(rec) for rec in SeqIO.parse(argv[2], 'fasta') } counts = Counter() for ix in pb()(coords.index): row = coords.ix[ix] counts.update(seqs[row.chrom][row.max_upstream:row.tss]) print(counts)
def train(root, invis='fusion20', outvis='fusion20', inir='fusion', outir='fusion', predict='vis', sigma=0, cropsize=200, inchannels=1, outchannels=1, preload=None, saveto=None, batchsize=20, lr=1e-3, nbepochs=60): if torch.cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') net = DnCNN(inchannels=inchannels, outchannels=outchannels) net.init_weights() if preload: net.load_state_dict(torch.load(preload)) net.cuda() folders = get_folders(os.path.join(root, 'fusion')) da = dataaugmenter(sigma, cropsize, inchannels, outchannels) ds_train = AugmentedData( ConcatDataset( TwoFramesDataset( LayerConcatDataset([ FolderFusionDataset( folder.replace('fusion', invis) + '/VIS'), FolderFusionDataset( folder.replace('fusion', inir) + '/IR'), ], roll=0 if predict == 'vis' else 1), LayerConcatDataset([ FolderFusionDataset( folder.replace('fusion', outvis) + '/VIS'), FolderFusionDataset( folder.replace('fusion', outir) + '/IR'), ], roll=0 if predict == 'vis' else 1), ) for folder in folders[1:]), da) da = dataaugmenter2(inchannels, outchannels) ds_val = AugmentedData( ConcatDataset( TwoFramesDataset( LayerConcatDataset([ FolderFusionDataset( folder.replace('fusion', invis) + '/VIS'), FolderFusionDataset( folder.replace('fusion', inir) + '/IR'), ], roll=0 if predict == 'vis' else 1), LayerConcatDataset([ FolderFusionDataset( folder.replace('fusion', 'fusion') + '/VIS'), FolderFusionDataset( folder.replace('fusion', 'fusion') + '/IR'), ], roll=0 if predict == 'vis' else 1), ) for folder in folders[:1]), da) loader_train = DataLoader(dataset=ds_train, num_workers=2, batch_size=batchsize, shuffle=True) loader_val = DataLoader(dataset=ds_val, num_workers=2, batch_size=1, shuffle=False) criterion = nn.MSELoss() criterion.cuda() optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=1e-5, amsgrad=False, eps=1e-8, betas=(0.9, 0.999)) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True) def do_batch(noisy, target, train=True): if train: optimizer.zero_grad() denoised = net(noisy.to(device)) loss = criterion(denoised, target.to(device)) if train: loss.backward() optimizer.step() # if scheduler: # scheduler.step() return denoised, loss from progressbar import progressbar as pb for epoch in range(nbepochs): net.train() for i, (noisy, target) in pb(enumerate(loader_train)): denoised, loss = do_batch(noisy, target) if i % 100 == 0: write_tensor('noisy.tif', noisy[0:1, ...]) write_tensor('target.tif', target[0:1, ...]) write_tensor('denoised.tif', denoised[0:1, ...]) del loss del denoised net.eval() with torch.no_grad(): l = 0 n = 0 for i, (noisy, target) in enumerate(loader_val): if i > 3: break denoised, loss = do_batch(noisy, target, train=False) l += loss.item() write_tensor(f'{saveto}/val_{epoch}_{i}_noisy.tif', noisy[0:1, ...]) write_tensor(f'{saveto}/val_{epoch}_{i}_target.tif', target[0:1, ...]) write_tensor(f'{saveto}/val_{epoch}_{i}_denoised.tif', denoised[0:1, ...]) n += 1 print('val:', epoch, l / n) scheduler.step(l) if saveto: torch.save([net, optimizer], f'{saveto}/checkpoint_{epoch}.tar')
import so_textprocessing as stp from progressbar import progressbar as pb from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model import LogisticRegression from sklearn.metrics import classification_report from sklearn.model_selection import train_test_split #%% Path to directory containing 'data' folder, containing NVD json data path = "/home/david/Documents/misc/FirebaseApp/cvss_prediction/" #%% Store Data rows = [] #%% Read All Data for year in pb(range(2002, 2019)): CVE = pd.read_json(path + "data/nvdcve-1.1-{}.json".format(year)) for row in CVE.CVE_Items: cve = dict() # Get Data cve_id = row["cve"]["CVE_data_meta"]["ID"] description = row["cve"]["description"]["description_data"] impact = row["impact"] # Define columns cve["description"] = dict() cve["impact"] = dict() # Assign Information cve["description"][cve_id] = description