Ejemplo n.º 1
0
def test(root,
         outputdir,
         invis='fusion20',
         inir='fusion',
         predict='vis',
         model=None):
    if torch.cuda.is_available():
        torch.set_default_tensor_type('torch.cuda.FloatTensor')

    net = torch.load(model)[0]
    net.cuda()

    dirvis = root.replace('fusion', invis) + '/VIS'
    dirir = root.replace('fusion', inir) + '/IR'
    from progressbar import progressbar as pb
    for vis, ir in pb(
            zip(sorted(os.listdir(dirvis)), sorted(os.listdir(dirir)))):
        f = vis
        vis = f'{dirvis}/{vis}'
        ir = f'{dirir}/{ir}'

        imgvis = data.read(vis)
        imgir = data.read(ir)
        input = torch.cat([imgvis, imgir], dim=0)

        if predict != 'vis':
            input = input.roll(shifts=1, dims=0)

        input = input[:net.inchannels, ...]
        input = input.unsqueeze(0).to(device)

        with torch.no_grad():
            out = net(input)

        write_tensor(f'{outputdir}/{f}.tif', out * 255)
Ejemplo n.º 2
0
def generate_bar(ln, text, redirect_stdout=False):
    return pb(min_value=0,
              max_value=ln,
              widgets=[
                  FormatLabel(text),
              ] + base_widgets,
              redirect_stdout=redirect_stdout)
def get_corrs(data, adjust=identity, corr_func='pearson'):
    max_slice = defaultdict(int)
    for sl in data.columns:
        sl = sl.split('_sl')
        emb = sl[0]
        max_slice[emb] = max(max_slice[emb], int(sl[1][0:2]))
    xs = pd.Series(index=data.columns,
                   data=[int(a.split('_sl')[1][:2])/max_slice[a.split('_sl')[0]]
                         for a in data.columns if 'sl' in a])

    corrs_same = defaultdict(list)
    corrs_diff = defaultdict(list)
    all_corrs = [corrs_diff, corrs_same]
    for emb1_name in pb()(max_slice):
        emb1 = data.select(**sel_startswith(emb1_name)).applymap(adjust)
        genotype = emb1_name.split('_')[0]
        xs1 = xs.select(startswith(emb1_name))
        for emb2_name in max_slice:
            if emb1_name == emb2_name: continue
            emb2 = data.select(**sel_startswith(emb2_name)).applymap(adjust)
            xs2 = xs.select(startswith(emb2_name))
            closest = {
                column:
                min((abs(x2 - x1), c2)
                    for c2, x2 in xs2.items())[1]
                for column, x1 in xs1.items()
            }
            for col in emb1.columns:
                same = genotype == emb2_name.split('_')[0]
                all_corrs[same][genotype].append(emb1.ix[:, col].corr(
                    emb2.ix[:, closest[col]],
                    corr_func,
                ))
    return all_corrs
Ejemplo n.º 4
0
    def get_subspace_pics(self):

        if self.subspace_pics is not None:
            return self.subspace_pics

        pics_reshaped = self.get_reshaped_pics(
        ) if self.reshaped_pics is None else self.reshaped_pics
        mean_pic = self.get_mean_pic(
        ) if self.mean_pic is None else self.mean_pic
        centered_pics = self.get_centered_pics(
        ) if self.centered_pics is None else self.centered_pics

        dim = np.min((np.linalg.matrix_rank(centered_pics[:, :, 0]),
                      np.linalg.matrix_rank(centered_pics[:, :, 1]),
                      np.linalg.matrix_rank(centered_pics[:, :, 2])))

        svd_r, svd_g, svd_b = self.compute_svd(
        ) if self.svd is None else self.svd

        result = np.zeros((dim, self.num_pics, 3))

        for i in pb(range(self.num_pics)):
            result[:, i, 0] = svd_r[0][:, :dim].T @ (pics_reshaped[:, i, 0] -
                                                     mean_pic[:, 0])
            result[:, i, 1] = svd_g[0][:, :dim].T @ (pics_reshaped[:, i, 1] -
                                                     mean_pic[:, 1])
            result[:, i, 2] = svd_b[0][:, :dim].T @ (pics_reshaped[:, i, 2] -
                                                     mean_pic[:, 2])

        self.subspace_pics = result

        return result
def gen_statistics(match_method, iters=50, debug=False):
    fnames = glob("test/f*.png")
    match = []
    mismatch = []
    for i in pb(range(iters)):
        f1_test = random.choice(fnames)
        f1_num = int(f1_test[6:10])
        f2_num = f1_num + random.randint(-1, 1)
        f2_test_glob = "test/s" + str(f2_num).zfill(4) + "*.png"
        try:
            f2_test = glob(f2_test_glob)[0]
            match_val = match_method(f1_test, f2_test)
        except Exception as e:
            continue
        if f1_num == f2_num:
            match.append(match_val)
        else:
            mismatch.append(match_val)
    #if debug:
    #print(f"Matches: {match}")
    #print(f"Mismatches: {mismatch}")
    false_reject = [a for a in match if a is False]
    false_accept = [a for a in mismatch if a is True]
    false_reject_rate = (len(false_reject) / len(match))
    false_accept_rate = (len(false_accept) / len(mismatch))
    print(f"False acceptance rate: {false_accept_rate}")
    print(f"False reject rate: {false_reject_rate}")
    return ((false_accept_rate, false_reject_rate))
def execute(function: cocoex.Problem,
            algorithm: Callable,
            population_size: int = 100,
            generations: int = 100,
            initialization: Callable = np.random.uniform,
            show_progress=False) -> Tuple[np.array, np.array]:
    """
    Run algorithm on the function.
    :param function: Function on which to run.
    :param algorithm: Algorithm to execute.
    :param population_size: Population size.
    :param generations: How many generations to execute.
    :param initialization: Initialization of the first population.
    :param show_progress: Whether to show progress.
    :return: Tuple of population in shape (generations, population_size, function.dimension) and evaluations in shape (generations, population_size).
    """
    populations = []
    evaluations = []

    counter = range(generations - 1)
    if show_progress:
        counter = pb(counter)

    population = initialization(function.lower_bounds, function.upper_bounds,
                                [population_size, function.dimension])
    for gen in counter:
        populations.append((population if isinstance(population, np.ndarray)
                            else population.numpy()).copy())
        evaluations.append(evaluate(function, populations[-1]))
        population = algorithm(population, evaluations[-1], function)

    return np.stack(populations, axis=0), np.stack(evaluations, axis=0)
Ejemplo n.º 7
0
def get_background_snprate(tsss, dnase, snps, exons):
    len_regions = 0
    num_snps = 0
    snprate_dict = {}
    for i in pb()(tsss.index):
        tss = tsss.ix[i]
        if isinstance(tss, pd.Series):
            n, l = get_snps_and_len(tss, dnase, snps, set(), exons)
            len_regions += l
            num_snps += n
            if l:
                snprate_dict[i] = n/l
        elif isinstance(tss, pd.DataFrame):
            seen_dnase2 = set()
            ns = 0
            ls = 0
            for i, tss in tss.iterrows():
                n, l = get_snps_and_len(tss, dnase, snps, seen_dnase2, exons)
                len_regions += l
                ls += l
                num_snps += n
                ns += n
            if ls:
                snprate_dict[i] = ns/ls
    return num_snps / len_regions, num_snps, len_regions, snprate_dict
def fstpso_multiple(function: cocoex.Problem,
                    repeats=10,
                    population_size: int = 100,
                    generations: int = 100,
                    show_progress=False) -> Tuple[np.array, np.array]:
    """
    Run the FST-PSO algorithm multiple times.
    :param function: Function to optimize.
    :param repeats: How many times to repeat the execution.
    :param population_size: Number of particles in the swarm.
    :param generations: How many iterations to perform.
    :param show_progress: Whether to show progress (verbose logging).
    :return: Tuple of population in shape (repeats, generations, population_size, function.dimension) and evaluations in shape (repeats, generations, population_size).
    """
    counter = range(repeats)
    if show_progress:
        counter = pb(counter)

    populations = []
    evaluations = []
    for iteration in counter:
        population, evaluation = fstpso(function, population_size, generations,
                                        False)
        populations.append(population)
        evaluations.append(evaluation)

    return np.stack(populations), np.stack(evaluations)
def get_corrs(data, adjust=identity, corr_func='pearson'):
    max_slice = defaultdict(int)
    for sl in data.columns:
        sl = sl.split('_sl')
        emb = sl[0]
        max_slice[emb] = max(max_slice[emb], int(sl[1][0:2]))
    xs = pd.Series(index=data.columns,
                   data=[
                       int(a.split('_sl')[1][:2]) /
                       max_slice[a.split('_sl')[0]] for a in data.columns
                       if 'sl' in a
                   ])

    corrs_same = defaultdict(list)
    corrs_diff = defaultdict(list)
    all_corrs = [corrs_diff, corrs_same]
    for emb1_name in pb()(max_slice):
        emb1 = data.select(**sel_startswith(emb1_name)).applymap(adjust)
        genotype = emb1_name.split('_')[0]
        xs1 = xs.select(startswith(emb1_name))
        for emb2_name in max_slice:
            if emb1_name == emb2_name: continue
            emb2 = data.select(**sel_startswith(emb2_name)).applymap(adjust)
            xs2 = xs.select(startswith(emb2_name))
            closest = {
                column: min((abs(x2 - x1), c2) for c2, x2 in xs2.items())[1]
                for column, x1 in xs1.items()
            }
            for col in emb1.columns:
                same = genotype == emb2_name.split('_')[0]
                all_corrs[same][genotype].append(emb1.ix[:, col].corr(
                    emb2.ix[:, closest[col]],
                    corr_func,
                ))
    return all_corrs
def execute_multiple(function: cocoex.Problem,
                     algorithm: Callable,
                     repeats: int = 10,
                     population_size: int = 100,
                     generations: int = 100,
                     initialization: Callable = np.random.uniform,
                     show_progress=False) -> Tuple[np.array, np.array]:
    """
    Run algorithm multiple times on the function.
    :param function: Function on which to run.
    :param algorithm: Algorithm to execute.
    :param repeats: How many times to repeat the execution.
    :param population_size: Population size.
    :param generations: How many generations to execute.
    :param initialization: Initialization of the first population.
    :param show_progress: Whether to show progress.
    :return: Tuple of population in shape (repeats, generations, population_size, function.dimension) and evaluations in shape (repeats, generations, population_size).
    """
    counter = range(repeats)
    if show_progress:
        counter = pb(counter)

    populations = []
    evaluations = []
    for iteration in counter:
        population, evaluation = execute(function, algorithm, population_size,
                                         generations, initialization, False)
        populations.append(population)
        evaluations.append(evaluation)

    return np.stack(populations), np.stack(evaluations)
Ejemplo n.º 11
0
    def calc_supports_on_binaries(self, threshold=None, get_weights=False):
        binarized_df = self.onehot_numerical()
        
        cols = binarized_df.columns.tolist()
        cols_d = {i:cols[i] for i in range(len(cols))}
        
        X_train, X_test = train_test_split(binarized_df, test_size=self.test_size, random_state=self.seed)
            
        self.train_df = X_train
        self.test_df = X_test
        
        if get_weights:
            X_test = deepcopy(X_train)
        
        X_train_np, X_test_np = np.array(X_train), np.array(X_test)
        
        X_train_pos = np.array(X_train[X_train[self.target] == 1])
        X_train_neg = np.array(X_train[X_train[self.target] == 0])

        support_pos, support_neg = [], []

        for i in pb(range(len(X_test))):
            support_pos_i, support_neg_i = [], []
            
            for j in range(len(X_train)):
                test_ones = np.where(X_test_np[i,:-1] == 1)[0]
                train_ones = np.where(X_train_np[j,:-1] == 1)[0]
                both_ones = np.intersect1d(test_ones, train_ones)

                if len(both_ones) > 0:
                    X_sub_pos = deepcopy(X_train_pos)
                    X_sub_neg = deepcopy(X_train_neg)
                    
                    X_sub_pos_slice = X_sub_pos[:,both_ones]
                    X_sub_neg_slice = X_sub_neg[:,both_ones]
                    
                    X_sub_pos_sum = np.sum(X_sub_pos_slice, axis=1)
                    X_sub_neg_sum = np.sum(X_sub_neg_slice, axis=1)
                    
                    X_sub_pos_chosen = X_sub_pos_sum[X_sub_pos_sum == len(both_ones)]
                    X_sub_neg_chosen = X_sub_neg_sum[X_sub_neg_sum == len(both_ones)]
                    
                    support_pos_i.append(len(X_sub_pos_chosen) / len(X_train_pos))
                    support_neg_i.append(len(X_sub_neg_chosen) / len(X_train_neg))
                    
            if self.threshold:
                support_pos_i = np.array(support_pos_i)
                support_neg_i = np.array(support_neg_i)
                support_pos_i = support_pos_i[support_pos_i > self.threshold]
                support_neg_i = support_neg_i[support_neg_i > self.threshold]
                
            support_pos.append(support_pos_i)
            support_neg.append(support_neg_i)
            
        return support_pos, support_neg
Ejemplo n.º 12
0
    def transform_df(self,
                     df,
                     process=False,
                     stop=False,
                     stem=False,
                     tags=False,
                     reformat='summary',
                     columns=['title', 'question', 'answers']):
        """Convenience function using dataframe with columns:
            tags (str), title (str), question (str), answers (str)
           See strmatch() for additional argument details
           If reformat = 'full', then text is preprocessed and stemmed in-place.
        """
        if tags or "tags" in reformat or reformat == 'full':
            df['tags'] = df.tags.progress_apply(
                lambda x: ' '.join(x.split('|')))
        if process or "process" in reformat or reformat == 'full':
            for col in columns:
                df.loc[:, col] = df[col].progress_apply(self.preprocess)
        if stop or "stop" in reformat or reformat == 'full':
            en_stop = set(stopwords.words('english'))
            en_stop.update([
                'use', 'like', 'tri', 'get', 'set', 'way', 'may', 'would',
                'could', 'might', 'also'
            ])
            for col in columns:
                df[col] = df[col].progress_apply(lambda x: ' '.join(
                    [i for i in x.split() if not i in en_stop]))
        if stem or "stem" in reformat or reformat == 'full':
            for col in columns:
                df.loc[:, col] = df[col].progress_apply(self.stem)
        if 'only' in reformat: return df
        process = False
        stem = False

        column = []
        for i in pb(df.itertuples()):
            text = "{} {} {} {}".format(' '.join(i.tags.split('|')), i.title,
                                        i.question, i.answers)
            column.append(
                self.strmatch(text,
                              process=process,
                              stem=stem,
                              reformat=reformat))
        if reformat == 'summary' or reformat == 'full':
            df['uniq'] = [i[0] for i in column]
            df['raw'] = [i[1] for i in column]
            df['words'] = ['|'.join(i[2]) for i in column]
            df['len'] = [i[3] for i in column]
            df['ratio'] = [i[4] for i in column]
        else:
            df['strmatch'] = column
        return df
Ejemplo n.º 13
0
    def compute_eps_0(self):

        if self.eps_0 is not None:
            return self.eps_0

        reshaped_pics = self.get_reshaped_pics()
        second_dists = []

        for i in pb(range(self.num_pics)):
            cur_dists = self.calc_distances(reshaped_pics[:, i, :])
            cur_dists = sorted(cur_dists, key=lambda tup: tup[1])
            second_dists.append(cur_dists[1][1])

        eps_0 = np.mean(second_dists) * 1.5
        self.eps_0 = eps_0

        return eps_0
Ejemplo n.º 14
0
def psnr(root, outputdir, predict='vis'):
    criterion = nn.MSELoss()

    d = 'VIS'
    if predict == 'ir':
        d = 'IR'
    gtdir = root + '/' + d + '/'

    filt = lambda l: sorted(filter(lambda x: not x.endswith('.psnr'), l))

    from progressbar import progressbar as pb
    for gt, pred in pb(
            zip(filt(os.listdir(gtdir)), filt(os.listdir(outputdir)))):
        pred = f'{outputdir}/{pred}'
        groundtruth = data.read(f'{gtdir}/{gt}')
        predicted = data.read(pred)

        mse = criterion(groundtruth, predicted).item()
        psnr = mse_to_psnr(mse)
        open(pred + '.psnr', 'w').write(f'{psnr:.3f}')
Ejemplo n.º 15
0
    EC.element_to_be_clickable(
        (By.XPATH, "//span[contains(text(), 'Next')]"))).click()
driver.get("https://data.stackexchange.com/account/login")
driver.find_element_by_css_selector(
    "div[class='preferred-login']").find_element_by_css_selector(
        'span').click()

#%%#######################################################################
#                               Basic Data                               #
##########################################################################

## Read queries
q = 'select Id as id, CreationDate as creationdate from Posts where Id IN ({})'

## Get SO (change sql when necessary)
for count, batch in pb(enumerate(so_ids)):
    driver.get("https://data.stackexchange.com/stackoverflow/query/new")
    query = q.format(','.join(batch))
    jscript = "document.getElementsByClassName('CodeMirror')[0].CodeMirror.setValue('{}')".format(
        query)
    driver.execute_script(jscript)

    ## Submit Query and download results
    button = wait.until(
        EC.element_to_be_clickable(
            (By.CSS_SELECTOR, "button[id='submit-query")))
    button.click()
    results = wait.until(
        EC.element_to_be_clickable(
            (By.CSS_SELECTOR, "a[id='resultSetsButton")))
    results_link = results.get_attribute('href')
Ejemplo n.º 16
0
def main():
	try:
		parser = OptionParser()

		parser.add_option("-p", "--vid_path", dest="vid_fldr", help="Path to Videos to test.",default='../../../../data/Videos/All/')
		parser.add_option("--fp", "--frame_path", dest="frame_fldr", help="Path to Frames to test",default='../../../../Frames/Real/'),
		parser.add_option("-f", "--frame_gen", dest="frame_gen",help="Frame Generator Selection; if flag included, will generate frames from extracted frames", action= 'store_false', default=True)
		parser.add_option("--lp", "--label_path", dest="label_path", help="Path to GT labels (for visualization)", default='./labels/'),
		parser.add_option("-r", "--results", dest="generateCSV",help="Flag. Produces detection text files", action= 'store_false', default=True)
		parser.add_option("-v", "--visualize", dest="visualize", help="Flag. Shows preview of video and detections",action= 'store_true', default=False)
		parser.add_option("--vb", "--verbose", dest="verbose", help="Flag. Prints outputs from FRCNN,Tracker and Classifier",action= 'store_true', default=False)

		parser.add_option("-n", "--vid", dest="vidname", help=" Name of videos to test (if testing specific videos)" ,default=None)

		parser.add_option("--scaleBoxes", dest="scaleBoxes", help=" float : 0.0 -> 1.0 \n Amount boxes are scaled after detection ", default=1.0)
		parser.add_option("--showROI", dest="showROI", help="Bool. Preview detected regions in seperate window", action= 'store_true', default=False)
		parser.add_option("--part", dest="test_part", help="Generate results for part of test dataset\n  --part <integer 1-6>" ,default=None)
		parser.add_option("--continue", dest="continue_last", help="Continues result generation from video last processed; used in case code terminates prematurely",action= 'store_true' ,default=False)

		(options, args) = parser.parse_args()

		# path to video/frames
		vid_fldr = options.vid_fldr
		frame_fldr = options.frame_fldr

		if options.frame_gen and not os.path.exists(vid_fldr):
			raise Exception("Error: video directory does not exist")
		elif not options.frame_gen and not os.path.exists(frame_fldr):
			raise Exception("Error: frame directory does not exist")

		# path to labels (for visualization)
		gtlabels = options.label_path

		# flags
		verbose = options.verbose
		visualize = options.visualize
		generateCSV = options.generateCSV

		# maximum videos frames to hold in queue
		max_vid_que = 1

		# Queues (will hold 1200 frames ~ 4 videos at max.
		# Will start filling again once consumed by model)
		imgQ = Queue(maxsize = 300*max_vid_que)						# Queue for Images to be used by Sign Classifier
		imgVisualizeQ = Queue(maxsize =  300*max_vid_que)			# Queue for Images to be used by visualization code
		frcnnQL = Queue(maxsize =  300*max_vid_que)					# Queue for Images to be used by FRCNN (bounding box detector), left half of image
		frcnnQR = Queue(maxsize =  300*max_vid_que)					# Queue for Images to be used by FRCNN (bounding box detector), right half of image
		LKTrackQ = Queue(maxsize =  300*max_vid_que)
																	# Queue for Images to be used by box tracker system (Lucas Kanade/Optical Flow)
		cnnQ = Queue(maxsize =  300*max_vid_que)					# Queue for bounding boxes to be used by Sign Classifier (will get co-ordinates and crop accodingly)
		bboxQ = Queue()												# Queue for bounding boxes to be used by tracker system
		trackerOutQ = Queue()										# Queue for bounding boxes output from tracker system (used for visualization)
		cnnOutQ = Queue()											# Queue for classes output from sign Classifier (used for feedback in tracker system)
		chTrackQ = Queue()											# Queue for keeping track of challenge types (used to dyanmically change classifier model weights)

		# Initializing settings for neural networks
		frcnnSettings =  optionsFRCNN()
		cnnSettings = optionsCNN()

		print "\nInitializing Tracker... ",
		tracker = hybridTracker()
		print "Complete."

		print "\nInitializing FRCNN ... ",
		C,frcnnRPN,frcnnClass,frcnnClassOnly = setupFRCNN(frcnnSettings)
		print "Complete."

		print "\nBuilding Neural Networks ...",
		cnn = createModel(cnnSettings)
		loadWeights(cnn,cnnSettings.model_weights_default)
		print "Complete."

		test_vid_rea = [4,5,6,7,8,18,19,21,24,26,31,38,39,41,47]		# test video sequences (real videos)
		test_vid_syn = [2,4,6,9,12,13,16,17,18,20,22,28,31,32,36]		# test video sequences (synthesized videos)

		# When we generated the result, we split the test videos into 6 parts and ran it on 6 PCs
		if options.test_part is not None:
			if int(options.test_part) == 1:
				test_vid_rea = test_vid_rea[:5]				# first 5 real video sequences
				test_vid_syn = []
			elif int(options.test_part) == 2:
				test_vid_rea = test_vid_rea[5:10]			# second 5 real video sequences
				test_vid_syn = []
			elif int(options.test_part) == 3:
				test_vid_rea = test_vid_rea[10:]			# third 5 real video sequences
				test_vid_syn = []
			elif int(options.test_part) == 4:
				test_vid_rea = []
				test_vid_syn = test_vid_syn[:5]				# first 5 synthesized video sequences
			elif int(options.test_part) == 5:
				test_vid_rea = []
				test_vid_syn = test_vid_syn[5:10]			# second 5 synthesized video sequences
			elif int(options.test_part) == 6:
				test_vid_rea = []
				test_vid_syn = test_vid_syn[10:]			# third 5 synthesized video sequences


		# if testing individual videos, command line argument used
		if options.vidname is not None:
			vidname = options.vidname.split(",")

		# else all videos in test dataset processed
		else:
			vidname = []
			vidname.extend( ["01_%02d_00_00_00" %(seq) for seq in test_vid_rea]	)																# Real No Challenge
			vidname.extend( ["02_%02d_00_00_00" %(seq) for seq in test_vid_syn] )																# Syn No Challenge
			vidname.extend( ["01_%02d_01_%02d_%02d" %(seq,eff,lvl) for seq in test_vid_rea for eff in range(1,13) for lvl in range(1,6)] )		# Real Challenge
			vidname.extend( ["02_%02d_01_%02d_%02d" %(seq,eff,lvl) for seq in test_vid_syn for eff in range(1,12) for lvl in range(1,6)] )		# Syn Challenge

			print "\nGenerating Detections for all test videos (%d videos):" % len(vidname)
			print "-----------------------------------------------------------"

		# if --continue flag given, we skip videos for which detection file already generated
		if options.continue_last:
			adjusted_vidname = []
			print "\n"
			for vid in vidname:
				detection_file = './detections/' + vid + '.txt'

				if os.path.exists(detection_file):
					print "Skipping %s: Detection file already exists" %vid
					continue
				else:
					adjusted_vidname.append(vid)

			vidname = adjusted_vidname

		# frameGenerator 1 gets frames from video, 2 from extracted frames
		if options.frame_gen:
			frames = frameGenerator1(vidname,vid_fldr,classify=True)
		else:
			frames = frameGenerator2(vidname,frame_fldr,classify=True)

		# need to iterate generator once before handing over to another thread
		# this is because of peculiar way keras's  model.predict() behaves when multi-threading
		overlap = 70
		ch_type,img = frames.next()
		imgQ.put(img)
		imgVisualizeQ.put(img)
		frcnnQL.put(img[0:660,0:814+overlap/2])
		frcnnQR.put(img[0:660,814-overlap/2:1627])
		LKTrackQ.put([ch_type,cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)])

		# thread to feed get frames,preprocess and feed queues
		t1 = threading.Thread(target=populateQ,args=(frames,imgQ,imgVisualizeQ,frcnnQL,frcnnQR,LKTrackQ,chTrackQ,visualize,))
		t1.setDaemon(True)
		t1.start()

		# thread for tracker module
		t2 = threading.Thread(target=tracker.track,args=(LKTrackQ,cnnOutQ,bboxQ,cnnQ,trackerOutQ,))
		t2.setDaemon(True)
		t2.start()

		# Setting up generator functions
		Right = findBBox(frcnnQR,'R',C,frcnnSettings,frcnnRPN,frcnnClassOnly)				# bounding box detector (right side of frames)
		Left = findBBox(frcnnQL,'L',C,frcnnSettings,frcnnRPN,frcnnClassOnly)				# bounding box decetor  (left side of frames)
		Classify = classifySign(cnn,cnnQ,cnnOutQ,imgQ,cnnSettings,class_map=None,scale=float(options.scaleBoxes),showROI=options.showROI)		# sign classifier


		for vid in vidname:
			try:
				print "\n\n---------------------------------"
				print "Processing:\t %s" % vid
				print "---------------------------------\n"

				if not verbose:
					bar = pb(max_value = 300)		# progress bar

				# loading labels to plot ground truth
				if visualize:
					try:
						labels = np.genfromtxt(gtlabels + vid[:5] +'.txt',delimiter='_')
						fnum = labels[:,0]
						gtbox = np.hstack((labels[:,2:4],labels[:,8:10]))
					except:
						print "\nError Loading Labels: please check label folder directory"
						print "Continuing without visualization\n"
						visualize = False
						pass

				# creating files to store detections
				if generateCSV:
					if not os.path.exists('./detections/'):
						os.makedirs('./detections/')

					detection_file = './detections/' + vid + '.txt'

					f = open( detection_file,'w')
					f.write("frameNumber_signType_llx_lly_lrx_lry_ulx_uly_urx_ury\n")


				# wait for queue to populate
				while frcnnQR.empty():
					continue

				idx = 0		# frame index

				# keeping track of challenge type/enviroment conditions
				prev_chType = 'NoCh'

				while idx<300:

					idx +=1

					# checking challenge type every 7 frames and changing classifier model weights for particular challenget type
					if idx % 7 == 0:
						curr_chType = chTrackQ.get()

						if prev_chType != curr_chType:
							loadWeights(cnn,cnnSettings.model_weights_effects[curr_chType])

							if verbose:
								print "\n---------------------------------------"
								print "Changing Classifier Model to : %s " % curr_chType
								print "---------------------------------------\n"

						prev_chType = curr_chType

					# bbox array will contain bounding boxes detected by FRCNN
					bbox = []

					# combining co-ordinates of boxes from left and right side of image
					bbox.extend(Left.next())
					bbox.extend(Right.next())

					# passsing boxex to tracker module through queue
					bboxQ.put(bbox)

					# classifier gets boxes from tracker module through another queue and returns box classes
					y = Classify.next()

					# boxes forwared to classifier by tracker (used for visualization and writing to file)
					bboxT = trackerOutQ.get().tolist()
					trackerOutQ.task_done()

					if verbose:
						print "Frame %03d: BBoxes :\t"%idx,
						print bboxT
						print "            Class :\t",
						print y
						print"\n"

					if visualize:
						img = imgVisualizeQ.get()
						overlay = img.copy()
						cv2.rectangle(overlay,(5,5),(330,70),(10,10,10),-1)
						cv2.putText(overlay,"Frame : %03d"%idx ,org = (10,50),fontFace =cv2.FONT_HERSHEY_SIMPLEX,color=(255,255,20),thickness=4,fontScale=1.5)

						# plotting Tracker output (Green)
						for sign,box in enumerate(bboxT):
							try:
								box = scaleBoxes(box,float(options.scaleBoxes))
								cv2.rectangle(overlay,(box[0],box[1]),(box[2],box[3]),(0,255,0),4)
								if y is not []:
									cv2.rectangle(overlay,(box[0]-10,box[3]+38),(box[0]+138,box[3]+3),(10,10,10),-1)
									cv2.putText(overlay,"Class: %d"%(y[sign]),org=(box[0],box[3]+30),fontFace=cv2.FONT_HERSHEY_SIMPLEX,color=(255,255,20),thickness=2,fontScale=0.9)
							except:
								continue


						# plotting GT boxes (Blue)
						try:
							for box in np.int16(gtbox[np.where(fnum==idx)]):
								box = scaleBoxes(box,float(options.scaleBoxes))
								cv2.rectangle(overlay,(box[0],box[1]),(box[2],box[3]),(255,0,0),2)
						except:
							pass

						cv2.addWeighted(overlay,0.7,img,0.3,0,img)

						img = cv2.resize(img,(814,618),interpolation=cv2.INTER_CUBIC)
						cv2.imshow('img',img)
						cv2.waitKey(1)
						imgVisualizeQ.task_done()

					if generateCSV:
						formatResults(f,idx,bboxT,y)		# writing to file

					if not verbose:
						bar.update(idx)						# updating progress bar

					if idx == 300:
						break

				if generateCSV:
					f.close()

			except StopIteration:
				if generateCSV:
					f.close()

	except Exception as e:
		print "\nExiting : %s \n" %e
		if generateCSV:
			f.close()
			os.remove(detection_file)		# removing last opened file

	except KeyboardInterrupt:
		print "\n---------------\nUser Stopped the Program.\n"
		if generateCSV:
			f.close()
			os.remove(detection_file)		# removing last opened file
Ejemplo n.º 17
0
    def compute_eps_1(self, path_to_nofaces=None):

        if self.eps_1 is not None:
            return self.eps_1

        path_list = []

        for path, subdirs, files in os.walk(path_to_nofaces):
            for name in files:
                if name.endswith('.jpg'):
                    path_list.append(os.path.join(path, name))

        pics_list = []

        for path in path_list:
            pic = Image.open(path)
            # ресайз кстати можно
            pic = pic.resize((self.pic_size, self.pic_size))
            pics_list.append(np.array(pic))
            pic.close()

        pics_list = np.array(pics_list)
        pics_reshaped = np.zeros((self.pic_size**2, len(pics_list), 3))

        for i, pic in enumerate(pics_list):
            for j in range(3):
                pics_reshaped[:, i, j] = pic[:, :, j].reshape(-1, 1)[:, 0]

        eps_1_vals = []

        projections = self.get_subspace_pics(
        ) if self.subspace_pics is None else self.subspace_pics
        r, g, b = self.compute_svd() if self.svd is None else self.svd
        mean_pic = self.get_mean_pic(
        ) if self.mean_pic is None else self.mean_pic
        dim = projections.shape[0]

        for i in pb(range(len(pics_list))):

            pic_projection_r = r[0][:, :dim].T @ (
                pics_reshaped[:, i, :][:, 0] - mean_pic[:, 0])
            pic_projection_g = g[0][:, :dim].T @ (
                pics_reshaped[:, i, :][:, 1] - mean_pic[:, 1])
            pic_projection_b = b[0][:, :dim].T @ (
                pics_reshaped[:, i, :][:, 2] - mean_pic[:, 2])

            f_p_r = r[0][:, :dim] @ pic_projection_r
            f_p_g = g[0][:, :dim] @ pic_projection_g
            f_p_b = b[0][:, :dim] @ pic_projection_b

            e_f_r = np.linalg.norm((pics_reshaped[:, i, :][:, 0] -
                                    mean_pic[:, 0]) - f_p_r)
            e_f_g = np.linalg.norm((pics_reshaped[:, i, :][:, 1] -
                                    mean_pic[:, 1]) - f_p_g)
            e_f_b = np.linalg.norm((pics_reshaped[:, i, :][:, 2] -
                                    mean_pic[:, 2]) - f_p_b)

            eps_1_vals.append(np.mean([e_f_r, e_f_g, e_f_b]))

        eps_1 = np.mean(eps_1_vals)

        self.eps_1 = eps_1

        return eps_1
Ejemplo n.º 18
0
""" UpstreamCounts.py

Script to get the actual number of A's, C's, T's, and G's upstream of genes
"""
import pandas as pd
from Bio import SeqIO
from OrderedSeqRec import OrderedSeqRecord
from sys import argv
from collections import Counter
from progressbar import ProgressBar as pb


if __name__ == "__main__":
    coords = pd.read_table(argv[1])
    seqs = {rec.id: OrderedSeqRecord(rec) for rec in  SeqIO.parse(argv[2], 'fasta')}
    counts = Counter()
    for ix in pb()(coords.index):
        row = coords.ix[ix]
        counts.update(seqs[row.chrom][row.max_upstream:row.tss])

    print(counts)


        'max_width': 880,
        'progress_bar': False,
        'split_columns': True,
        'total_width': 200,
        'nan_replace' : 0.5,
        'vspacer': 0}

    diffs = set()
    for gene, diff in (female_logistic_r2 - male_logistic_r2).items():
        if female_logistic_r2[gene] > .4 and abs(diff) > .25:
            diffs.add(gene)
    for gene, diff in (female_peak_r2 - male_peak_r2).items():
        if female_peak_r2[gene] > .4 and abs(diff) > .25:
            diffs.add(gene)

    for gene in pb()(diffs):
        pu.svg_heatmap(
            (
                None, expr_females.ix[[gene]],
                None, ase_females.ix[gene],
                None, ase_males.select(**sel_startswith('melXsim')).ix[gene],
                None, ase_males.select(**sel_startswith('simXmel')).ix[gene],
                None, expr_males.select(**sel_startswith('melXsim')).ix[[gene]],
                None, expr_males.select(**sel_startswith('simXmel')).ix[[gene]],
            ),
            'analysis_godot/results/sex_diff/{}.svg'.format(gene),
            norm_rows_by=(
                'female expression', 'max',
                'females - L{:.03f} P{:.03f}'.format(female_logistic_r2[gene],
                                                     female_peak_r2[gene]),
                'center0pre',
Ejemplo n.º 20
0
    return phi_c


RES = 1000
x = np.linspace(-3.5, 1.0, RES)


def Phi(M, typ):
    #zs = iso.zs
    zs = np.linspace(-100.0, 100.0, 1000)  # This was where the bug was
    phis = np.array([phi(M, z, [typ]) for z in zs])
    MDFs = np.array([MDF(z) for z in zs])
    ys = phis * MDFs
    I = trapz(ys, zs)
    return I


plt.figure()
plt.xlabel("Magnitude")
plt.ylabel("Luminosity Function (Arbitrary Units)")

for i in pb([1, 2, 3], redirect_stdout=True):
    # Smoothing and plotting
    ys = np.array([Phi(M, i) for M in x])
    np.save("Results/SALF/xs_t" + str(i), x)
    np.save("Results/SALF/ys_t" + str(i), ys)
    plt.plot(x, ys)
    print("Done Branch ", i)

plt.show()
Ejemplo n.º 21
0
parser.add_argument('species2', type=str,
        help='The second species (e.g. "sim")')

args = parser.parse_args()

script = open('qsub_base.sh').read()

species1_gtf = open('Reference/{}_good.gtf'.format(args.species1))
species2_gtf = open('Reference/{}_good.gtf'.format(args.species2))

species1_chroms = {line.split()[0] for line in species1_gtf}
species2_chroms = {line.split()[0] for line in species2_gtf}

print(species1_chroms, species2_chroms)
items = list(itertools.product(species1_chroms, species2_chroms))

jobs = []
for id1, id2 in pb()(items):
    job = script.format(
            job_name=id1+'_'+id2,
            id1 = id1,
            id2 = id2, 
            species1=args.species1,
            species2=args.species2)
    jobs.append(Popen(['qsub'], stdin=PIPE))
    jobs[-1].communicate(bytes(job, 'ASCII'))

for job in jobs:
    job.wait()

        'progress_bar': False,
        'split_columns': True,
        'total_width': 200,
        'nan_replace': 0.5,
        'vspacer': 0
    }

    diffs = set()
    for gene, diff in (female_logistic_r2 - male_logistic_r2).items():
        if female_logistic_r2[gene] > .4 and abs(diff) > .25:
            diffs.add(gene)
    for gene, diff in (female_peak_r2 - male_peak_r2).items():
        if female_peak_r2[gene] > .4 and abs(diff) > .25:
            diffs.add(gene)

    for gene in pb()(diffs):
        pu.svg_heatmap(
            (
                None,
                expr_females.ix[[gene]],
                None,
                ase_females.ix[gene],
                None,
                ase_males.select(**sel_startswith('melXsim')).ix[gene],
                None,
                ase_males.select(**sel_startswith('simXmel')).ix[gene],
                None,
                expr_males.select(**sel_startswith('melXsim')).ix[[gene]],
                None,
                expr_males.select(**sel_startswith('simXmel')).ix[[gene]],
            ),
    mel = expr.select(**ut.sel_startswith('melXmel_'))
    sim = expr.select(**ut.sel_startswith('simXsim_'))
    hyb = expr.select(**ut.sel_startswith(('melXsim', 'simXmel')))
    expr_in_mel = (mel.max(axis=1) > EXPR_MIN)
    expr_in_sim = sim.max(axis=1) > EXPR_MIN
    expr_in_hybrids = (hyb.max(axis=1) > EXPR_MIN)
    expr_in_all = (expr_in_mel & expr_in_sim & expr_in_hybrids)

    expr = expr.ix[expr_in_all]

    embryo_types = {c.split('_sl')[0].split('_rep')[0] for c in expr.columns}
    embryos = {}
    for etype in embryo_types:
        embryos[etype] = {
            c.split('_sl')[0]
            for c in expr.columns if c.startswith(etype)
        }

    combs = sum([sorted(it.combinations(e, 2)) for e in embryos.values()], [])
    combs += list(
        it.product(embryos['melXsim_cyc14C'], embryos['simXmel_cyc14C']))
    emds = pd.DataFrame(index=expr.index,
                        columns=["{}-{}".format(*c) for c in combs],
                        data=-1)
    for gene in pb()(expr.index):
        for e1, e2 in combs:
            emds.ix[gene, "{}-{}".format(e1, e2)] = (dd.earth_mover_multi_rep(
                expr.ix[gene].select(ut.startswith(e1)) + EXPR_MIN,
                expr.ix[gene].select(ut.startswith(e2)) + EXPR_MIN,
            ))
Ejemplo n.º 24
0
    def fit(self,
            train_data,
            valid_data=None,
            model_path=None,
            epochs=1,
            log_file=None,
            callbacks=[]):
        assert isinstance(train_data, torch.utils.data.DataLoader)

        self.write_log(log_file, time.asctime())
        self.init_callbacks(callbacks)

        for epo in range(epochs):

            for cbk in callbacks:  #TODO: the parameters will be determined later
                if hasattr(cbk, 'on_epoch_begin'):
                    cbk.on_epoch_begin()

            self.write_log(log_file, 'Epoch {}/{}'.format(epo + 1, epochs))
            self.model.train()
            history = []
            for x, y in pb(train_data):
                if x is None:
                    continue
                self.optimizer.zero_grad()
                record = []
                if self.transform:
                    with torch.no_grad():
                        x = self.transform(x)
                pred = self.model(x)
                loss = self.loss(pred, y)
                loss.backward()
                self.optimizer.step()
                record.append(loss.item())

                # eval metrics
                for metric in self.metrics:
                    metric_value = metric(pred, y)
                    record.append(metric_value)

                history.append(record)

            # print loss and metrics on this epoch
            statics = list(map(np.mean, list(zip(*history))))
            loss_tip = "train loss is {:.4f}".format(statics[0])
            metric_tips = list(
                map(", {} is {:.4f}".format, self.metrics_name, statics[1:]))
            tip = "".join([loss_tip, *metric_tips])
            self.write_log(log_file, tip)

            if isinstance(valid_data, torch.utils.data.DataLoader):
                # valid
                self.model.eval()
                valid_history = []
                with torch.no_grad():
                    for x, y in valid_data:
                        if x is None:
                            continue
                        valid_record = []
                        if self.transform:
                            x = self.transform(x)

                        pred = self.model(x)
                        loss = self.loss(pred, y)
                        valid_record.append(loss.item())

                        for metric in self.metrics:
                            metric_value = metric(pred, y)
                            valid_record.append(metric_value)
                        valid_history.append(valid_record)

                valid_statics = list(map(np.mean, list(zip(*valid_history))))
                loss_tip = "valid loss is {:.4f}".format(valid_statics[0])
                metric_tips = list(
                    map(", {} is {:.4f}".format, self.metrics_name,
                        valid_statics[1:]))
                tip = "".join([loss_tip, *metric_tips])
                self.write_log(log_file, tip)

            if True not in list(
                    map(lambda x: isinstance(x, Checkpoint), callbacks)):
                if model_path:
                    torch.save(self.model, model_path)

            for cbk in callbacks:
                if hasattr(cbk, 'on_epoch_end'):
                    if isinstance(cbk, LearningRateDecay):
                        cbk.on_epoch_end(epo, valid_statics[0])
                    elif isinstance(cbk, Checkpoint):
                        cbk.on_epoch_end(epo, valid_statics[0],
                                         valid_statics[1:], self.model)
                    else:
                        cbk.on_epoch_end()
Ejemplo n.º 25
0
        chrom = entry[0]
        if entry[2] != 'exon': continue
        start = int(entry[3])
        stop = int(entry[4])
        for i in range(start, stop):
            exons[(chrom, i)] = True
        if i%1e4 == 0:
            print('.', end='')
            stdout.flush()
    print("Done loading exons")

    snp_rates = {}
    len_regions_dict = {}
    num_snps_dict = {}

    for gene in pb()(peak.index.union(logist.index)):
        tss = tsss.ix[gene]
        len_regions = 0
        num_snps = 0
        seen_dnase = set()
        if isinstance(tss, pd.Series):
            #print("Single CRM for gene", gene)
            num_snps, len_regions = get_snps_and_len(tss, dnase, snps, set(), exons)
        elif isinstance(tss, pd.DataFrame):
            #print("Multiple CRMs for gene", gene)
            for i, tss in tss.iterrows():
                n, l = get_snps_and_len(tss, dnase, snps, seen_dnase, exons)
                len_regions += l
                num_snps += n
        else:
            assert False
Ejemplo n.º 26
0
def generate_bar(ln, text):
    return pb(min_value=0, max_value=ln, widgets=[FormatLabel(text), ] + base_widgets)
Ejemplo n.º 27
0
def main():
    from train import default_rd
    # --------------------------- 命令行参数设置 ---------------------------
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'model', help='model 所在的文件夹'
    )
    parser.add_argument(
        '-bs', '--batch_size', default=2, type=int, help='batch size,默认是2')
    parser.add_argument(
        '-nj', '--n_jobs', default=6, type=int, help='多核并行的核数,默认是6')
    parser.add_argument(
        '-is', '--input_size', default=(960, 600), type=int, nargs=2,
        help=(
            '默认是960, 600(是用于All的detection),因为增加了对于TCT的支持,'
            '这里不再能够使用int和None,必须指定2个'
        )
    )
    parser.add_argument(
        '-rs', '--random_seed', default=1234, type=int,
        help='随机种子数,默认是1234'
    )
    parser.add_argument(
        '-rd', '--root_dir', default=default_rd(),
        help=(
            '数据集所在的根目录,其内部是子文件夹储存图片, 这里是'
            '和system的类型有关,默认是ALL的数据')
    )
    parser.add_argument(
        '--no_normalize', action='store_false',
        help='是否对数据进行标准化,如果使用该参数则不进行标准化'
    )
    parser.add_argument(
        '-bb', '--backbone', default='resnet50',
        help='使用的backbone网络,默认是resnet50'
    )
    parser.add_argument(
        '-ps', default=[3, 4, 5, 6, 7], type=int, nargs='+',
        help='使用FPN中的哪些特征图来构建anchors,默认是p3-p7'
    )
    parser.add_argument(
        '-ph', '--phase', default='valid',
        choices=['all', 'train', 'valid', 'test'],
        help='是对全部的数据集做还是对根据seed分出来的test或valid做,默认是valid'
    )
    parser.add_argument(
        '-sd', '--save_dir', default='valid',
        help=(
            "会创建一个文件夹在模型所在的目录中,输出的图像都保存在其中,默认"
            "名称是valid"
        )
    )
    parser.add_argument(
        '-sr', '--save_root', default='./ALLresults',
        help='结果保存的根目录,默认是./ALLresults'
    )
    parser.add_argument(
        '--top_k', default=0, type=int,
        help="使用的是排名第几的模型进行预测,默认是0"
    )
    parser.add_argument(
        '--wh_min', default=None, type=int,
        help="默认是None,用于xml读取,过滤错误的框"
    )
    parser.add_argument(
        '-nt', '--nms_thre', default=0.3, type=float,
        help="进行nms时使用的阈值,默认是0.3"
    )
    parser.add_argument(
        '--custom_label_mapper', action='store_true',
        help="如果使用此参数,将把得到的标签列出,并自己给出起数字标签是什么"
    )
    args = parser.parse_args()

    # --------------------------- 读取文件名称 ---------------------------
    data_df, label_set = get_data_df(
        args.root_dir, check=False, check_labels=True)
    if args.custom_label_mapper:
        label_mapper = {}
        for l in label_set:
            i = int(input('label--%s的数字标签是:' % l))
            label_mapper[l] = i
    else:
        label_mapper = {l: i for i, l in enumerate(label_set)}
    num_classes = len(set(label_mapper.values()))
    print(label_mapper)
    # 数据集分割
    if args.phase in ['train', 'valid', 'test']:
        trainval_df, test_df = train_test_split(
            data_df, test_size=0.1, shuffle=True,
            random_state=args.random_seed
        )
        if args.phase == 'test':
            use_dat = test_df
        else:
            train_df, valid_df = train_test_split(
                trainval_df, test_size=1/9, shuffle=True,
                random_state=args.random_seed
            )
            if args.phase == 'train':
                use_dat = train_df
            else:
                use_dat = valid_df
    else:
        use_dat = data_df

    # --------------------------- 数据集建立 ---------------------------
    img_transfer = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])
    img_transfer = transfers.OnlyImage(img_transfer)
    resize_transfer = transfers.Resize(args.input_size)
    test_transfers = transforms.Compose([
        resize_transfer, img_transfer
    ])
    y_encoder_args = {
        'input_size': args.input_size, 'ps': args.ps,
        'nms_thre': args.nms_thre}
    xml_parse = {}
    if args.wh_min is not None:
        xml_parse['wh_min'] = args.wh_min
    use_data = ColabeledDataset(
        use_dat, transfer=test_transfers, y_encoder_mode='object',
        label_mapper=label_mapper, xml_parse=xml_parse, **y_encoder_args
    )
    use_dataloader = DataLoader(
        use_data, batch_size=args.batch_size, shuffle=False,
        num_workers=args.n_jobs, collate_fn=use_data.collate_fn)

    # --------------------------- 载入训练好的模型 ---------------------------
    if args.backbone == 'resnet50':
        backbone = models.resnet50
    elif args.backbone == 'resnet101':
        backbone = models.resnet101
    net = RetinaNet(backbone=backbone, ps=args.ps, num_class=num_classes)
    bests = torch.load(
        os.path.join(args.save_root, args.model, 'model.pth')
    )
    state_dict = bests[args.top_k]['model_wts']
    net.load_state_dict(state_dict)
    net.eval()

    # --------------------------- 预测 ---------------------------
    (labels_preds, markers_preds), (APs, mAP_score) = test(
        net.cuda(), use_dataloader, evaluate=True, predict=True,
        device=torch.device('cuda:0'), num_class=num_classes
    )
    for k, v in label_mapper.items():
        print('%s的AP是%.4f' % (k, APs[v]))
    print('mAP是%.4f' % mAP_score)

    # --------------------------- 可视化 ---------------------------
    # 创建文件夹保存结果
    save_dir = os.path.join(args.save_root, args.model, args.save_dir)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    # 设置和图像预处理相反的操作,得到原始图像
    to_pil = transforms.ToPILImage()
    no_norm = NoNormalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    # 遍历结果,并将结果画在图上
    true_color_mapper = {
        i: tuple(TrueColorList[i]) for i in range(num_classes)}
    pred_color_mapper = {
        i: tuple(PredColorList[i]) for i in range(num_classes)}
    for j, ((imgs, labels, markers), labels_pred, markers_pred) in pb(
        enumerate(zip(use_dataloader, labels_preds, markers_preds))
    ):
        imgs = no_norm(imgs)
        for i, (img, label, marker, label_pred, marker_pred) in enumerate(
            zip(imgs, labels, markers, labels_pred, markers_pred)
        ):
            img = to_pil(img)
            img = draw_rectangle(
                img, label.tolist(), marker.tolist(),
                color_mapper=true_color_mapper
            )
            if label_pred.numel() == 0:
                img.save(
                    os.path.join(
                        save_dir, str(j*args.batch_size+i) + '.png'
                    )
                )
            else:
                proba, idx_pred = label_pred.max(dim=1)
                img = draw_rectangle(
                    img, idx_pred.cpu().numpy(), marker_pred.cpu().numpy(),
                    color_mapper=pred_color_mapper,
                    fonts=proba.cpu().numpy().round(4)
                )
                img.save(
                    os.path.join(save_dir, str(j*args.batch_size+i) + '.png'))
Ejemplo n.º 28
0
def run_raytrace(TheSystem, hx_arr, hy_arr, nsur, configurationRange):
    dfs = []
    # Initialize x/y image plane arrays
    x_ary = np.empty([len(pxpys) * len(hx_arr)
                      ])  # center field +4 extreme fields
    y_ary = np.empty([len(pxpys) * len(hy_arr)])

    error_code = np.empty([len(pxpys) * len(hy_arr)], dtype=np.int32)
    vignette_code = np.empty([len(pxpys) * len(hy_arr)], dtype=np.int32)
    l = np.empty([len(pxpys) * len(hy_arr)], dtype=np.float32)
    m = np.empty([len(pxpys) * len(hy_arr)], dtype=np.float32)
    n = np.empty([len(pxpys) * len(hy_arr)], dtype=np.float32)

    px_output = np.empty([len(pxpys) * len(hx_arr)], dtype=np.float32)
    py_output = np.empty([len(pxpys) * len(hx_arr)], dtype=np.float32)
    hx_output = np.empty([len(pxpys) * len(hx_arr)], dtype=np.float32)
    hy_output = np.empty([len(pxpys) * len(hx_arr)], dtype=np.float32)

    # Adding Rays to Batch, varying normalised object height hy

    assert len(configurationRange) == 2
    for configurationNumber in pb(
            range(configurationRange[0], configurationRange[1] + 1)):
        TheSystem.MCE.SetCurrentConfiguration(configurationNumber)

        TheSystem.SystemData.Fields.SetVignetting()

        raytrace = TheSystem.Tools.OpenBatchRayTrace()
        normUnPolData = raytrace.CreateNormUnpol(
            len(hx_arr) * len(pxpys), constants.RaysType_Real, nsur)

        normUnPolData.ClearData()
        waveNumber = 1

        ray_counter = 0
        for pxpy in pxpys:
            px, py = pxpy
            for j in range(len(hx_arr)):
                px_output[ray_counter], py_output[ray_counter] = px, py
                hx_output[ray_counter], hy_output[ray_counter] = hx_arr[
                    j], hy_arr[j]
                normUnPolData.AddRay(waveNumber, hx_arr[j], hy_arr[j], px, py,
                                     constants.OPDMode_None)
                ray_counter += 1
                #! [e22s04_py]

        print('running raytrace...')
        baseTool = CastTo(raytrace, 'ISystemTool')
        baseTool.RunAndWaitForCompletion()

        normUnPolData.StartReadingResults()
        output = normUnPolData.ReadNextResult()

        j = 0
        while output[0]:  # success
            error_code[j] = output[2]
            vignette_code[j] = output[3]
            x_ary[j] = output[4]  # X
            y_ary[j] = output[5]  # Y
            l[j] = output[7]
            m[j] = output[8]
            n[j] = output[9]
            output = normUnPolData.ReadNextResult()
            j += 1

        hx_deg = max_field * hx_output
        hy_deg = max_field * hy_output

        package = {
            'hx_deg': hx_deg,
            'hy_deg': hy_deg,
            'x_pos': x_ary,
            'y_pos': y_ary,
            'px': px_output,
            'py': py_output,
            'error_code': error_code,
            'vignette_code': vignette_code,
            'l': l,
            'm': m,
            'n': n
        }
        df = pd.DataFrame(package)  # end extracting rays
        r = np.sqrt(df['x_pos'].values**2 + df['y_pos'].values**2)
        sel = r < 85

        dfs.append(df.iloc[sel])
        baseTool.Close()
    return dfs
Ejemplo n.º 29
0
    by_emb_sums = pd.DataFrame(index=good_ase.index, columns=sorted(embs))
    for emb in embs:
        by_emb_sums.ix[:, emb] = good_ase.T.select(ut.startswith(emb)).sum()

    rando_sums = {
        emb: pd.DataFrame(index=good_ase.index, columns=np.arange(N_SHUFFLES),
                          data=np.nan)
        for emb in embs
    }
    print("Randomizing", file=sys.stderr)
    with Pool() as p:
        results = []
        for i in range(N_SHUFFLES):
            results.append(p.apply_async(shuffle_ase, (good_ase, embs)))

        for i in pb()(list(range(N_SHUFFLES))):
            result = results[i].get()
            for emb in result:
                rando_sums[emb].ix[:, i] = result[emb]

    pvals_by_emb = pd.DataFrame(index=good_ase.index, columns=sorted(embs),
                                dtype=float)
    for gene in pb()(pvals_by_emb.index):
        for emb in pvals_by_emb.columns:
            pvals_by_emb.ix[gene, emb] = (
                (
                    bisect(
                        sorted(rando_sums[emb].ix[gene]),
                        by_emb_sums[emb].ix[gene]
                    )
                )
Ejemplo n.º 30
0
""" UpstreamCounts.py

Script to get the actual number of A's, C's, T's, and G's upstream of genes
"""
import pandas as pd
from Bio import SeqIO
from OrderedSeqRec import OrderedSeqRecord
from sys import argv
from collections import Counter
from progressbar import ProgressBar as pb

if __name__ == "__main__":
    coords = pd.read_table(argv[1])
    seqs = {
        rec.id: OrderedSeqRecord(rec)
        for rec in SeqIO.parse(argv[2], 'fasta')
    }
    counts = Counter()
    for ix in pb()(coords.index):
        row = coords.ix[ix]
        counts.update(seqs[row.chrom][row.max_upstream:row.tss])

    print(counts)
Ejemplo n.º 31
0
def train(root,
          invis='fusion20',
          outvis='fusion20',
          inir='fusion',
          outir='fusion',
          predict='vis',
          sigma=0,
          cropsize=200,
          inchannels=1,
          outchannels=1,
          preload=None,
          saveto=None,
          batchsize=20,
          lr=1e-3,
          nbepochs=60):
    if torch.cuda.is_available():
        torch.set_default_tensor_type('torch.cuda.FloatTensor')

    net = DnCNN(inchannels=inchannels, outchannels=outchannels)
    net.init_weights()
    if preload:
        net.load_state_dict(torch.load(preload))
    net.cuda()

    folders = get_folders(os.path.join(root, 'fusion'))
    da = dataaugmenter(sigma, cropsize, inchannels, outchannels)
    ds_train = AugmentedData(
        ConcatDataset(
            TwoFramesDataset(
                LayerConcatDataset([
                    FolderFusionDataset(
                        folder.replace('fusion', invis) + '/VIS'),
                    FolderFusionDataset(
                        folder.replace('fusion', inir) + '/IR'),
                ],
                                   roll=0 if predict == 'vis' else 1),
                LayerConcatDataset([
                    FolderFusionDataset(
                        folder.replace('fusion', outvis) + '/VIS'),
                    FolderFusionDataset(
                        folder.replace('fusion', outir) + '/IR'),
                ],
                                   roll=0 if predict == 'vis' else 1),
            ) for folder in folders[1:]), da)

    da = dataaugmenter2(inchannels, outchannels)
    ds_val = AugmentedData(
        ConcatDataset(
            TwoFramesDataset(
                LayerConcatDataset([
                    FolderFusionDataset(
                        folder.replace('fusion', invis) + '/VIS'),
                    FolderFusionDataset(
                        folder.replace('fusion', inir) + '/IR'),
                ],
                                   roll=0 if predict == 'vis' else 1),
                LayerConcatDataset([
                    FolderFusionDataset(
                        folder.replace('fusion', 'fusion') + '/VIS'),
                    FolderFusionDataset(
                        folder.replace('fusion', 'fusion') + '/IR'),
                ],
                                   roll=0 if predict == 'vis' else 1),
            ) for folder in folders[:1]), da)

    loader_train = DataLoader(dataset=ds_train,
                              num_workers=2,
                              batch_size=batchsize,
                              shuffle=True)
    loader_val = DataLoader(dataset=ds_val,
                            num_workers=2,
                            batch_size=1,
                            shuffle=False)

    criterion = nn.MSELoss()
    criterion.cuda()

    optimizer = optim.Adam(net.parameters(),
                           lr=lr,
                           weight_decay=1e-5,
                           amsgrad=False,
                           eps=1e-8,
                           betas=(0.9, 0.999))
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           verbose=True)

    def do_batch(noisy, target, train=True):
        if train:
            optimizer.zero_grad()
        denoised = net(noisy.to(device))
        loss = criterion(denoised, target.to(device))
        if train:
            loss.backward()
            optimizer.step()
            # if scheduler:
            # scheduler.step()
        return denoised, loss

    from progressbar import progressbar as pb
    for epoch in range(nbepochs):
        net.train()
        for i, (noisy, target) in pb(enumerate(loader_train)):
            denoised, loss = do_batch(noisy, target)
            if i % 100 == 0:
                write_tensor('noisy.tif', noisy[0:1, ...])
                write_tensor('target.tif', target[0:1, ...])
                write_tensor('denoised.tif', denoised[0:1, ...])
            del loss
            del denoised

        net.eval()
        with torch.no_grad():
            l = 0
            n = 0
            for i, (noisy, target) in enumerate(loader_val):
                if i > 3: break
                denoised, loss = do_batch(noisy, target, train=False)
                l += loss.item()
                write_tensor(f'{saveto}/val_{epoch}_{i}_noisy.tif', noisy[0:1,
                                                                          ...])
                write_tensor(f'{saveto}/val_{epoch}_{i}_target.tif',
                             target[0:1, ...])
                write_tensor(f'{saveto}/val_{epoch}_{i}_denoised.tif',
                             denoised[0:1, ...])
                n += 1
            print('val:', epoch, l / n)
        scheduler.step(l)

        if saveto:
            torch.save([net, optimizer], f'{saveto}/checkpoint_{epoch}.tar')
Ejemplo n.º 32
0
import so_textprocessing as stp
from progressbar import progressbar as pb
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

#%% Path to directory containing 'data' folder, containing NVD json data
path = "/home/david/Documents/misc/FirebaseApp/cvss_prediction/"

#%% Store Data
rows = []

#%% Read All Data
for year in pb(range(2002, 2019)):
    CVE = pd.read_json(path + "data/nvdcve-1.1-{}.json".format(year))
    for row in CVE.CVE_Items:
        cve = dict()

        # Get Data
        cve_id = row["cve"]["CVE_data_meta"]["ID"]
        description = row["cve"]["description"]["description_data"]
        impact = row["impact"]

        # Define columns
        cve["description"] = dict()
        cve["impact"] = dict()

        # Assign Information
        cve["description"][cve_id] = description