Example #1
0
def main():
	'''
	Run with at least:
	1 argument, defining the number of minima to be found

	optional last argument is file name of data to read into memory (if it hasn't alreay been read)
	'''

	if sys.argv[1] == 'help':
		print(main.__doc__)
		return

	srch_val = int(sys.argv[1])

	global np_init_array

	try:
		np_init_array		
	except NameError:
		flat.print_log_msg('Reading data')
		np_init_array, np_init_array_x = rd.read_data(sys.argv[len(sys.argv)-1])

	found_width = custom_binary_search_with_trackback(np_init_array, filt.apply_filter_get_minima, srch_val, trackback_delta=200, trackback_step=20, init_search_location=1000)
	print('found_width: ', found_width)
	flat.print_log_msg('Done')
Example #2
0
def trackback(wrapper, srch_val, start_search, delta_coarse, step_coarse, step_fine=1):
	found_more = True # just to enter loop
	flat.print_log_msg('Starting coarse search')
	while found_more: # whenever we find more, search continues looking as far as delta width from newly found location
		found_more = False
		for i in range(start_search+step_coarse, start_search+delta_coarse, step_coarse):
			if i>=len(wrapper):
				break
			if wrapper[i] == srch_val:
				found_more = True
				start_search = i
				break

	# By default, step_fine = 1, therefore fine-grained search will happen. It can be disabled by setting step_fine to 0.
	if step_fine > 0:
		if step_fine > step_coarse:
			raise Exception('Error: step_fine is greater than step_coarse')
		delta_fine = step_coarse

		found_more = True # just to enter loop
		flat.print_log_msg('Starting fine search')
		while found_more: # whenever we find more, search continues looking as far as delta width from there
			found_more = False
			for i in range(start_search+step_fine, start_search+delta_fine, step_fine):
				if i>=len(wrapper):
					break
				if wrapper[i] == srch_val:
					found_more = True
					start_search = i		
					break

	return start_search
def main():
#     def __init__(self, name, snp_first, snp_last, input_config, breakpoints):
#      
    begin = 9411243
    end = 48119216
    
#     begin = 46287140
#     end = 48119216
     
    breakpoints1 = [10148322, 15250019, 15864313, 16491839, 17748811, 18252127, 18912106, 19637870, 20332293, 20929869, 21190923, 21649595, 22318833, 23231365, 24271200, 24774771, 25035980, 26088085, 27431612, 27666047, 28290149, 28485200, 28761470, 29335757, 29790442, 30972911, 32778127, 33370496, 34413058, 35253882, 35614394, 36328018, 37283402, 38078491, 39227880, 39908770, 40259482, 40965403, 41448115, 41676786, 42689700, 43100808, 43345207, 43799567, 44748107, 45265729, 45789905, 46336509, 46883153, 47465743]   
    
    # metric = Metric('chr21', cnst.const['orig_data'], breakpoints1, begin, end)
    metric = Metric('chr21', cnst.return_conf('/nethome/jkpickrell/1kG_data/covariance_matrix/'), breakpoints1, begin, end)
    out = metric.calc_metric()
    print(out)
    print(out['sum']/out['N_zero'])

    breakpoints2 = [i for i in range(begin, end+1, int((end-begin)/(len(breakpoints1)-1)))]
    
    metric = Metric('chr21', cnst.return_conf('/nethome/jkpickrell/1kG_data/covariance_matrix/'), breakpoints2, begin, end)
    out = metric.calc_metric()
    print(out)
    print(out['sum']/out['N_zero'])
    
    flat.print_log_msg('Done')
Example #4
0
def run_local_search_single(chr_name, breakpoint_loci, locus_index, start,
                            stop, total_sum, total_N, input_config,
                            metric_out):
    print("---- Running local search single")
    try:
        print("----", start, stop, locus_index, breakpoint_loci, total_sum,
              total_N)
        local_search_run = local_search.LocalSearch(chr_name, start, stop,
                                                    locus_index,
                                                    breakpoint_loci, total_sum,
                                                    total_N, input_config)

        new_breakpoint, new_metric = local_search_run.search()

        print_breakpoint_comparison(new_breakpoint, new_metric,
                                    breakpoint_loci[locus_index], metric_out)

        return new_breakpoint, new_metric
    except Exception as e:
        flat.print_log_msg('Error!')
        flat.print_log_msg(str(e))
        flat.print_log_msg('start: ' + repr(start))
        flat.print_log_msg('stop: ' + repr(stop))
        # flat.print_log_msg('local_search.__dict__: '+repr(local_search.__dict__))
        flat.print_log_msg('Continuing...')
        return breakpoint_loci[locus_index], None
Example #5
0
def custom_binary_search_with_trackback(np_init_array,
                                        f,
                                        srch_val,
                                        trackback_delta=200,
                                        trackback_step=20,
                                        init_search_location=1000):
    flat.print_log_msg('Starting custom_binary_search_with_trackback')

    # One-sided binary (i.e., exponential) search first
    "apply f to np_init_array and check if init_search_location is smaller than srch_val"
    "if not, double init search val and try again"

    print('search_val: ', srch_val)
    end_v = find_end(np_init_array, f, init_search_location, srch_val)
    print('end_v: ', end_v)
    wrapper = FlexibleBoundedAccessor(np_init_array, f, 0, end_v, True)

    # Search with deferred detection of equality
    found_width_raw = binsrch.find_le_ind(wrapper, srch_val)
    print('found_width_raw: ', found_width_raw)
    found_width = end_v - found_width_raw
    print('found_width: ', found_width)

    # Find any remaining "noisy" minima
    found_width_trackback_raw = trackback(wrapper, srch_val, found_width_raw,
                                          trackback_delta, trackback_step)
    print("found_width_trackback_raw", found_width_trackback_raw)
    found_width_trackback = end_v - found_width_trackback_raw

    # Final result
    found_width = found_width_trackback
    print('found_width final: ', found_width)

    return found_width
def pipeline_lean(dataset_path,
                  name,
                  out_fname,
                  begin=-1,
                  end=-1,
                  img='no',
                  orient='diag',
                  red='sum',
                  dataset_name='NONAME'):
    '''
    pipeline_lean(dataset_path, name, begin=-1, end=-1, img='no', orient='diag', red='sum')
    '''

    # analysis = matrix_to_vector.MatrixAnalysis(name, cnst.const[dataset], begin, end)
    analysis = matrix_to_vector.MatrixAnalysis(name,
                                               cnst.return_conf(dataset_path),
                                               begin, end)

    print(analysis.snp_first)
    print(analysis.snp_last)

    t = datetime.datetime.now()
    t_formatted = t.strftime('%Y_%m_%d_%H_%M_%S')

    # out_fname = 'vector-'+dataset_name+'-'+name+'-'+str(analysis.snp_first)+'-'+str(analysis.snp_last)+'-'+orient+'-'+red+'-img_'+img+'-'+t_formatted
    # out_fname += '.txt.gz'
    flat.print_log_msg('out_fname: ' + out_fname)

    if (img == 'yes'):
        generate_img = True
    elif (img == 'no'):
        generate_img = False
    else:
        raise Exception('Error: Unknown argument: ' + img)

    if (orient == 'vert'):
        analysis.calc_vert(not generate_img)
    elif (orient == 'diag'):
        analysis.calc_diag_lean(out_fname, cnst.const['out_delim'],
                                not generate_img)
    else:
        raise Exception('Error: Unknown argument: ' + orient)

    if (red == 'avg'):
        avg = True
        raise Exception(
            'Average used, but its output is not always consistent - especially for diag!'
        )
    elif (red == 'sum'):
        avg = False
    else:
        raise Exception('Error: Unknown argument: ' + red)

    # Output is done step-by-step
    # analysis.write_output_to_file(out_fname+'.txt.gz', cnst.const['out_delim'], avg)

    if generate_img:
        analysis.generate_img(out_fname + cnst.const['img_out_ext'])

    flat.print_log_msg('Done')
Example #7
0
def standard_run(np_init_array, np_init_array_x, start, stop, step):
    # Interactive plots
    graphs = filt.apply_filters(np_init_array, start, stop, step)
    # graphs = filt.apply_filters(np_init_array, int(sys.argv[1]), int(sys.argv[2])+1, int(sys.argv[3]))

    for g in graphs:
        flat.print_log_msg('indices' + repr(g['width']) +
                           repr(g['filtered_minima_ind']))
        loci = filt.get_minima_loc(g, np_init_array_x)
        flat.print_log_msg('loci' + repr(g['width']) + repr(loci))

    plot_all(np_init_array, graphs, np_init_array_x)
Example #8
0
def print_breakpoint_comparison(breakpoint1, metric1, breakpoint2, metric2):
    flat.print_log_msg('Breakpoint 1: ' + repr(breakpoint1))
    flat.print_log_msg('Metric 1:')
    print_metric(metric1)
    flat.print_log_msg('Breakpoint 2: ' + repr(breakpoint2))
    flat.print_log_msg('Metric 2:')
    print_metric(metric2)
Example #9
0
def main():
    '''
	Run with at least:
	a) 1 argument, defining the central filter width of analysis area
	b) 3 arguments, defining the start, stop, and step of series of filter widths

	optional last argument is file name of data to read into memory (if it hasn't alreay been read)
	'''
    if sys.argv[1] == 'help':
        print(main.__doc__)
        return
    # max_w = 10000
    # vals = []
    # for width in range(1, max_w):
    # 	vals.append(filt.apply_filter_get_minima(np_init_array, width))
    # 	print(width)

    # np_temp_array = np.array(vals)

    # minima = sig.argrelextrema(np_temp_array, np.greater)[0]

    # print(minima)

    global np_init_array, np_init_array_x

    try:
        np_init_array
        np_init_array_x
    except NameError:
        flat.print_log_msg('Reading data')
        np_init_array, np_init_array_x = rd.read_data(sys.argv[len(sys.argv) -
                                                               1])

    relative_width = 0.5

    if len(sys.argv) > 3:
        start, stop, step = int(sys.argv[1]), int(sys.argv[2]), int(
            sys.argv[3])
    else:
        center_val = int(sys.argv[1])
        start, stop, step = math.floor(
            center_val - relative_width *
            center_val), math.ceil(center_val +
                                   relative_width * center_val), math.floor(
                                       2 * relative_width * center_val / 6)

    standard_run(np_init_array, np_init_array_x, start, stop, step)

    flat.print_log_msg('Done')
Example #10
0
    def write_output_to_file(self, filename, out_delim, avg=False):
        if not self.calculation_complete:
            raise Exception(
                'Error: Calculation has not been completed prior to output file generation'
            )

        flat.print_log_msg('Writing output to file')
        if avg:
            flat.write_output(filename, self.locus_list,
                              self.locus_list_deleted, self.vert_sum,
                              out_delim, self.vert_sum_len)
        else:
            flat.write_output(filename, self.locus_list,
                              self.locus_list_deleted, self.vert_sum,
                              out_delim)
Example #11
0
def run_local_search_complete(chr_name, breakpoint_loci, begin, end, input_config, metric_out):
    breakpoint_loci_local_search = {}
    breakpoint_loci_local_search['loci'] = []
    breakpoint_loci_local_search['metrics'] = []
    
    
    total_sum = metric_out['sum']
    total_N = metric_out['N_zero']

    # Search between begin and first midpoint
    b_stop = int(midpoint(breakpoint_loci[0], breakpoint_loci[1])) #-1 # -1 so as to not overlap with next region! -> this is taken care of in local search
    
    new_breakpoint, new_metric = run_local_search_single(chr_name, breakpoint_loci, 0, begin, b_stop, total_sum, total_N, input_config, metric_out)
    breakpoint_loci_local_search['loci'].append(new_breakpoint)
    breakpoint_loci_local_search['metrics'].append(new_metric)
    
    for locus_index in range(1, len(breakpoint_loci)-1):
        b_start = int(midpoint(breakpoint_loci[locus_index-1], breakpoint_loci[locus_index]))
        b_stop = int(midpoint(breakpoint_loci[locus_index], breakpoint_loci[locus_index+1])) #-1 # -1 so as to not overlap with next region! -> this is taken care of in local search
        
        new_breakpoint, new_metric = run_local_search_single(chr_name, breakpoint_loci, locus_index, b_start, b_stop, total_sum, total_N, input_config, metric_out)
        breakpoint_loci_local_search['loci'].append(new_breakpoint)
        breakpoint_loci_local_search['metrics'].append(new_metric)
#         local_search_run = local_search.LocalSearch(chr_name, breakpoint_loci[locus_index-1], breakpoint_loci[locus_index+1], locus_index, breakpoint_loci, total_sum, total_N, input_config)       
#         
#         new_breakpoint, new_metric = local_search_run.search()
#         
#         print_breakpoint_comparison(new_breakpoint, new_metric, breakpoint_loci[locus_index], metric_out)
# #         print(new_breakpoint, new_metric['sum']/new_metric['N_zero'])
# #         print(breakpoint_loci[locus_index], total_sum/total_N)
#         
#         breakpoint_loci_local_search['loci'].append(new_breakpoint)
#         breakpoint_loci_local_search['metrics'].append(new_metric)

    # Search between last midpoint and end
    b_start = int(midpoint(breakpoint_loci[len(breakpoint_loci)-2], breakpoint_loci[len(breakpoint_loci)-1]))
    
    new_breakpoint, new_metric = run_local_search_single(chr_name, breakpoint_loci, len(breakpoint_loci)-1, b_start, end, total_sum, total_N, input_config, metric_out)
    breakpoint_loci_local_search['loci'].append(new_breakpoint)
    breakpoint_loci_local_search['metrics'].append(new_metric)
    
    flat.print_log_msg('New breakpoints:')
    print(breakpoint_loci_local_search)

    return breakpoint_loci_local_search 
def main():
	name = 'chr1'

	flat.print_log_msg('Starting run')
	x, y, pairs = flat.read_hotspots(cnst.const['genetic_maps']['root']+cnst.const['genetic_maps']['file_base']+name+cnst.const['genetic_maps']['ext'])
	
	flat.print_log_msg('Plotting')
	pt.plot(x,y)
	fig = pt.gcf()
	fig.set_size_inches((40,30))

	pt.xlabel('SNP #')
	pt.ylabel('Hotspot val')
	pt.title('Hotspots')

	pt.savefig('genetic_maps_output.png')

	pt.clf()
	pt.plot(x[5000:10000],y[5000:10000])
	fig = pt.gcf()
	fig.set_size_inches((40,30))

	pt.xlabel('SNP #')
	pt.ylabel('Hotspot val')
	pt.title('Hotspots zoomed')

	pt.savefig('genetic_maps_zoomed_output.png')

	flat.print_log_msg('Done')
Example #13
0
    def calc_diag_lean(self, out_fname, out_delim, dynamic_delete=True):
        # flat.print_log_msg('Removing existing matrix output file')
        # try:
        #     os.remove(cnst.const['out_matrix_delim'])
        # except OSError:
        #     pass

        if dynamic_delete == False:
            raise Exception(
                'Error: Conversion has been run in lean mode, but with dynamically=False.'
            )

        self.dynamic_delete = dynamic_delete

        flat.print_log_msg('Start')

        # pre-read all relevant partitions at beginning!
        last_p_num = -1
        for p_num_init in range(0, len(self.partitions) - 1):
            if self.snp_first >= self.partitions[p_num_init + 1][0]:
                flat.print_log_msg('Pre-reading partition: ' +
                                   str(self.partitions[p_num_init]))
                flat.read_partition_into_matrix_lean(
                    self.partitions, p_num_init, self.matrix, self.locus_list,
                    self.name, self.input_config, self.snp_first,
                    self.snp_last)
                last_p_num = p_num_init
            else:
                break

        curr_locus = -1
        # for p_num, p in enumerate(self.partitions):
        for p_num in range(last_p_num + 1, len(self.partitions)):
            p = self.partitions[p_num]

            flat.print_log_msg('Reading partition: ' + str(p))
            flat.read_partition_into_matrix_lean(self.partitions, p_num,
                                                 self.matrix, self.locus_list,
                                                 self.name, self.input_config,
                                                 self.snp_first, self.snp_last)

            # print("self.partitions", self.partitions[:5])
            # print("self.locus_list", self.locus_list[:5])
            # print("self.matrix", list(self.matrix.items())[:1])
            # print("self.snp_first", self.snp_first)
            # raise
            # Determine first locus
            if curr_locus < 0:  # Either first partition or not found in first partition
                # curr_locus = -1 # <- this should have been set to -1 before entering the main for loop
                if len(self.locus_list) > 0:
                    # Find first locus >= snp_first
                    for i, locus in enumerate(self.locus_list):
                        if locus >= self.snp_first:
                            curr_locus = locus
                            start_locus = locus
                            curr_locus_index = i
                            start_locus_index = i
                            break
                else:
                    raise Exception('Error: locus_list seems to be empty')
            # else:
            # 	if len(self.locus_list)>0:
            # 		curr_locus = self.locus_list[0]
            # 		curr_locus_index = 0
            # 	else:
            # 		raise Exception('Error: locus_list seems to be empty')
            else:
                try:
                    curr_locus_index = self.locus_list.index(curr_locus)
                    # curr_locus is carried from prev iteration, but index has changed since part of matrix (and locus_list) has been deleted
                except ValueError:
                    if len(self.locus_list) > 0:
                        curr_locus = self.locus_list[0]
                        curr_locus_index = 0
                    else:
                        raise Exception('Error: locus_list seems to be empty')

            if curr_locus < 0:
                flat.print_log_msg(
                    'Warning: curr_locus not found! Continuing to next partition.'
                )
                flat.print_log_msg(
                    'Comment: This is possibly due to snp_first being very close to end of partition.'
                )
                flat.print_log_msg('Details: ')
                flat.print_log_msg('Partition: ' + repr(p))
                flat.print_log_msg('snp_first: ' + repr(self.snp_first))
                flat.print_log_msg('curr_locus: ' + repr(curr_locus))
                continue  #continue to next partition
                # raise Exception('Error: curr_locus not found!')

            # Determine end locus
            if p_num + 1 < len(self.partitions):
                end_locus = int(
                    (self.partitions[p_num][1] + self.partitions[p_num + 1][0])
                    / 2)  # diag - specific
                print("1 end locus", end_locus, p_num)
            else:
                # end_locus = self.partitions[p_num][1]

                # Find last locus <= snp_last
                end_locus_found = False
                for i in reversed(range(0, len(self.locus_list))):
                    # for locus in reversed(locus_list):
                    if self.locus_list[i] <= self.snp_last:
                        end_locus = self.locus_list[i]
                        end_locus_index = i
                        end_locus_found = True
                        break

                print("2 end locus", end_locus, p_num)

                if not end_locus_found:
                    end_locus_index = 0
                    end_locus = self.locus_list[end_locus_index]

            flat.print_log_msg('Running for partition: ' + str(p))
            # This will not include the very last SNP of the complete range, but that shouldn't be too important since the end of the range shouldn't be a defining location for LD
            while curr_locus <= end_locus:
                print("-----" * 5)
                print("curr_locus", curr_locus)
                total_iterations = 0
                total_additions = 0
                x = self.locus_list[curr_locus_index]
                y = self.locus_list[curr_locus_index]
                print("x_idx", curr_locus_index)
                print("y_idx", curr_locus_index)
                print("x", x)
                print("y", y)
                delta = 0

                while x >= self.partitions[p_num][0] and y <= self.partitions[
                        p_num][1]:
                    print("  x", x)
                    print("  y", y)
                    # print("  delta", delta)
                    # when would x not be in matrix or y not be in matrix[x]?
                    if x in self.matrix and y in self.matrix[x]:
                        # print("computing corr coeff for", x, y)
                        corr_coeff = self.matrix[x][y] / math.sqrt(
                            self.matrix[x][x] * self.matrix[y][y])
                        self.add_corr_coeff(corr_coeff, curr_locus)

                        print("  self.vert_sum[curr_locus]",
                              self.vert_sum[curr_locus])
                        total_additions += 1
                        # Just save it in the matrix ;) - removed for chrom11
                        # self.matrix[x]['data'][y]['corr_coeff'] = corr_coeff
                    # else:
                    # 	flat.print_log_msg('Condition not satisfied 1!')
                    # 	flat.print_log_msg('x: '+repr(x)+' y: '+repr(y))

                    if delta != 0:
                        x = self.locus_list[curr_locus_index - delta + 1]
                        if x in self.matrix and y in self.matrix[x]:
                            # print("computing corr coeff for", x, y)
                            corr_coeff = self.matrix[x][y] / math.sqrt(
                                self.matrix[x][x] * self.matrix[y][y])
                            self.add_corr_coeff(corr_coeff, curr_locus)
                            print("  self.vert_sum[curr_locus]",
                                  self.vert_sum[curr_locus])

                            total_additions += 1
                            # Just save it in the matrix ;) - removed for chrom11
                            # self.matrix[x]['data'][y]['corr_coeff'] = corr_coeff
                        # else:
                        # 	flat.print_log_msg('Condition not satisfied 2!')
                        # 	flat.print_log_msg('x: '+repr(x)+' y: '+repr(y))

                    delta += 1
                    if curr_locus_index - delta >= 0:
                        print("x_idx", curr_locus_index - delta)
                        x = self.locus_list[curr_locus_index - delta]
                    else:
                        # flat.print_log_msg('X index out of bounds')
                        flat.print_log_msg('X index out of bounds')
                        break

                    if curr_locus_index + delta < len(self.locus_list):
                        print("y_idx", curr_locus_index + delta)
                        y = self.locus_list[curr_locus_index + delta]
                    else:
                        flat.print_log_msg('Y index out of bounds')
                        break

                    total_iterations += 1
                print("total_iterations", total_iterations)
                print("total_additions", total_additions)

                if curr_locus_index + 1 < len(self.locus_list):
                    curr_locus_index += 1
                    curr_locus = self.locus_list[curr_locus_index]
                else:
                    flat.print_log_msg('curr_locus_index out of bounds')
                    break

            # flat.print_log_msg('Mem before delete: '+repr(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss))
            # flat.delete_loci_smaller_than_and_output_matrix_to_file(end_locus, self.matrix, locus_list, locus_list_deleted, cnst.const['out_matrix_filename'])
            if self.dynamic_delete:
                flat.print_log_msg('Deleting loci not required any more')
                if p_num + 1 < len(self.partitions):
                    delete_loc = self.partitions[p_num + 1][0]
                else:
                    delete_loc = end_locus

                flat.delete_loci_smaller_than_lean(delete_loc, self.matrix,
                                                   self.locus_list,
                                                   self.locus_list_deleted,
                                                   out_fname, self.vert_sum,
                                                   out_delim)
            else:
                flat.print_log_msg('locus_list size: ' +
                                   repr(len(self.locus_list)))

            # flat.print_log_msg('Mem after delete:  '+repr(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss))

        self.start_locus = start_locus
        self.start_locus_index = start_locus_index
        self.end_locus = end_locus
        self.end_locus_index = end_locus_index

        self.calculation_complete = True
def pipeline(dataset_path,
             name,
             out_fname,
             begin=-1,
             end=-1,
             img='no',
             orient='diag',
             red='sum',
             snp=None,
             comment='',
             dataset_name='NONAME'):
    '''
    pipeline(dataset_path, name, begin=-1, end=-1, img='no', orient='diag', red='sum', snp=None, comment='')

    snp1 and snp2 are loci of two SNPs that need to be converted into ordinal numbers representing row/col in image of matrix
    '''

    # analysis = matrix_to_vector.MatrixAnalysis(name, cnst.const[dataset], begin, end)
    analysis = matrix_to_vector.MatrixAnalysis(name,
                                               cnst.return_conf(dataset_path),
                                               begin, end)

    print(analysis.snp_first)
    print(analysis.snp_last)

    if (img == 'yes'):
        generate_img = True
    elif (img == 'no'):
        generate_img = False
    else:
        raise Exception('Error: Unknown argument: ' + img)

    if (orient == 'vert'):
        analysis.calc_vert(not generate_img)
    elif (orient == 'diag'):
        analysis.calc_diag(not generate_img)
    else:
        raise Exception('Error: Unknown argument: ' + orient)

    if (red == 'avg'):
        avg = True
        raise Exception(
            'Average used, but its output is not always consistent - especially for diag!'
        )
    elif (red == 'sum'):
        avg = False
    else:
        raise Exception('Error: Unknown argument: ' + red)

    t = datetime.datetime.now()
    t_formatted = t.strftime('%Y_%m_%d_%H_%M_%S')

    # out_fname = 'vector-'+dataset_name+'-'+name+'-'+comment+'-'+str(analysis.snp_first)+'-'+str(analysis.snp_last)+'-'+orient+'-'+red+'-img_'+img+'-'+t_formatted

    analysis.write_output_to_file(out_fname, cnst.const['out_delim'], avg)

    if generate_img:
        # flat.print_log_msg('x_values: '+repr(x_values))
        if snp is not None:
            analysis.generate_img(
                'img-' + out_fname + cnst.const['img_out_ext'], snp)
        else:
            analysis.generate_img('img-' + out_fname +
                                  cnst.const['img_out_ext'])

    flat.print_log_msg('Done')
    def calc_metric_full(self):
        # flat.print_log_msg('Removing existing matrix output file')
        # try:
        #     os.remove(cnst.const['out_matrix_delim'])
        # except OSError:
        #     pass
        
        if not self.dynamic_delete:
            raise Exception('Error: dynamic delete must be True for metric calculation!')

        flat.print_log_msg('Start metric')
        
        curr_breakpoint_index = 0
        block_height = 0
        block_width = 0
        
        total_N_SNPs = decimal.Decimal('0')
        block_width_sum = decimal.Decimal('0')

        # pre-read all relevant partitions at beginning!
        last_p_num = -1
        for p_num_init in range(0, len(self.partitions)-1):
            if self.snp_first >= self.partitions[p_num_init+1][0]:
                flat.print_log_msg('Pre-reading partition: '+str(self.partitions[p_num_init])) 
                flat.read_partition_into_matrix(self.partitions, p_num_init, self.matrix, self.locus_list, self.name, self.input_config, self.snp_first, self.snp_last)
                last_p_num = p_num_init
            else:
                break

        curr_locus = -1
        # for p_num, p in enumerate(self.partitions):
        for p_num in range(last_p_num+1, len(self.partitions)):
            p = self.partitions[p_num]

            flat.print_log_msg('Reading partition: '+str(p))
            flat.read_partition_into_matrix(self.partitions, p_num, self.matrix, self.locus_list, self.name, self.input_config, self.snp_first, self.snp_last)

            # Determine first locus
            if curr_locus<0: # Either first partition or not found in first partition
                # curr_locus = -1 # <- this should have been set to -1 before entering the main for loop
                if len(self.locus_list)>0:
                    # Find first locus >= snp_first
                    for i, locus in enumerate(self.locus_list):
                        if locus >= self.snp_first:
                            curr_locus = locus
                            start_locus = locus
                            curr_locus_index = i
                            start_locus_index = i
                            break
                else:
                    raise Exception('Error: locus_list seems to be empty') 
            # else:
            #   if len(self.locus_list)>0:
            #       curr_locus = self.locus_list[0]
            #       curr_locus_index = 0
            #   else:
            #       raise Exception('Error: locus_list seems to be empty')
            else:
                try:
                    curr_locus_index = self.locus_list.index(curr_locus)
                    # curr_locus is carried from prev iteration, but index has changed since part of matrix (and locus_list) has been deleted
                except ValueError:
                    if len(self.locus_list)>0:
                        curr_locus = self.locus_list[0]
                        curr_locus_index = 0
                    else:
                        raise Exception('Error: locus_list seems to be empty')

            if curr_locus<0:
                flat.print_log_msg('Warning: curr_locus not found! Continuing to next partition.')
                flat.print_log_msg('Comment: This is possibly due to snp_first being very close to end of partition.')
                flat.print_log_msg('Details: ')
                flat.print_log_msg('Partition: '+repr(p))
                flat.print_log_msg('snp_first: '+repr(self.snp_first))
                flat.print_log_msg('curr_locus: '+repr(curr_locus)) 
                continue #continue to next partition 
                # raise Exception('Error: curr_locus not found!')   
            
            # Determine last locus
            if p_num+1 < len(self.partitions):
                end_locus = self.partitions[p_num+1][0]
                end_locus_index = -1
            else:
                # end_locus = self.partitions[p_num][1]

                # Find last locus <= snp_last
                end_locus_found = False
                for i in reversed(range(0, len(self.locus_list))):
                # for locus in reversed(locus_list):
                    if self.locus_list[i] <= self.snp_last:
                        end_locus = self.locus_list[i]
                        end_locus_index = i
                        end_locus_found = True
                        break

                if not end_locus_found:
                    end_locus_index = 0
                    end_locus = self.locus_list[end_locus_index]
            
            flat.print_log_msg('Running metric for partition: '+str(p))
            # This will not include the very last SNP of the complete range, but that shouldn't be too important since the end of the range shouldn't be a defining location for LD
            while curr_locus <= end_locus:
                if  curr_breakpoint_index<len(self.breakpoints): 
                    if curr_locus > self.breakpoints[curr_breakpoint_index]: # Breakpoint is the last element of the block!
#                         block_height =  len(self.locus_list) - curr_locus_index
                        block_height =  0 - total_N_SNPs # - 1 # ? # this is in accordance with the formula for deferred sum calculation 
                        self.metric['N_zero'] += block_height * block_width
                        block_width_sum += block_width
                        
                        curr_breakpoint_index += 1
                        block_width = 0
                
                if  curr_breakpoint_index>=len(self.breakpoints):
                    break
                
#                 found = False
                try:
                    for key, el in self.matrix[curr_locus]['data'].items():
                        if key > self.breakpoints[curr_breakpoint_index]: # Only add those above the breakpoint!
                            corr_coeff = self.matrix[curr_locus]['data'][key]['shrink'] / math.sqrt( self.matrix[curr_locus]['data'][curr_locus]['shrink'] * self.matrix[key]['data'][key]['shrink'] )
                            self.metric['sum'] += decimal.Decimal(corr_coeff**2)
                            self.metric['N_nonzero'] += 1
#                             found = True
                except IndexError as e:
                    print('Error!')
                    print(e)
                    print(key, el)
                    print(curr_locus)
                    print(self.matrix)
                    print(self.breakpoints)
                    print(curr_breakpoint_index)
                    
#                 if found:
                block_width += 1 # block_width needs to be increased even if it doesn't have values in the outer part of the matrix! 
                    
                if curr_locus_index+1 < len(self.locus_list):
                    curr_locus_index+=1
                    curr_locus = self.locus_list[curr_locus_index]
                    total_N_SNPs += 1
                else:
                    flat.print_log_msg('curr_locus_index out of bounds')
                    break

#             if block_width > 0: # If an LD block hasn't finished, but a new partition must be read into memory
# #                 index_of_breakpoint_in_locus_list = -1
#                 for ind in range(curr_locus_index, len(self.locus_list)):
#                     if self.locus_list[ind] >= self.breakpoints[curr_breakpoint_index]:
# #                         index_of_breakpoint_in_locus_list = ind
#                         break
#                 
#                 num_of_SNPs_to_add = ind - curr_locus_index
#                 
# #                 if index_of_breakpoint_in_locus_list < 0:
# #                     raise Exception('Error: index_of_breakpoint_in_locus_list not found!')
#                 
# #                 block_height =  len(self.locus_list) - index_of_breakpoint_in_locus_list
#                 block_height =  0 - (total_N_SNPs+num_of_SNPs_to_add)
#                 self.metric['N_zero'] += block_height * block_width
#                 
#                 block_width_sum += block_width
#                 block_width = 0
                
            # flat.delete_loci_smaller_than_and_output_matrix_to_file(end_locus, self.matrix, locus_list, locus_list_deleted, cnst.const['out_matrix_filename'])
            if self.dynamic_delete:
                flat.print_log_msg('Deleting loci not required any more')
                flat.delete_loci_smaller_than(end_locus, self.matrix, self.locus_list, self.locus_list_deleted)

        self.start_locus = start_locus
        self.start_locus_index = start_locus_index
        self.end_locus = end_locus
        self.end_locus_index = end_locus_index
        
        self.metric['N_zero'] += total_N_SNPs * block_width_sum # this is in accordance with the formula for deferred sum calculation
        
        print('total_N_SNPs, block_width', total_N_SNPs, block_width)
        print('total_N_SNPs-block_width', total_N_SNPs-block_width)
        print('block_width_sum', block_width_sum)
        
        self.calculation_complete = True
        
        return self.metric
Example #16
0
def pipeline(input_fname,
             chr_name,
             dataset_path,
             n_snps_bw_bpoints,
             out_fname,
             begin=-1,
             end=-1,
             trackback_delta=200,
             trackback_step=20,
             init_search_location=1000):
    # print("n_snps_bw_bpoints", n_snps_bw_bpoints)
    # print("trackback_delta", trackback_delta)
    # print("trackback_step", trackback_step)
    config = cnst.return_conf(dataset_path)
    # begin, end = flat.first_last(chr_name, cnst.const[dataset], begin, end)
    "just reads first and last position in partitions"
    begin, end = flat.first_last(chr_name, config, begin, end)
    # READ DATA
    flat.print_log_msg('* Reading data')

    "just reads into snp pos and val into first and second list"
    init_array, init_array_x = rd.read_data_raw(input_fname)
    # print(init_array)
    # print(init_array_x)

    # Clip the input data to the required range and convert to numpy array
    "just a bisect left and bisect right"
    begin_ind = binsrch.find_ge_ind(init_array_x,
                                    begin)  # = init_array_x.index(begin)
    end_ind = binsrch.find_le_ind(init_array_x,
                                  end)  # = init_array_x.index(end)
    #
    # print("len before", len(init_array_x))
    np_init_array = np.array(init_array[begin_ind:(end_ind + 1)])
    np_init_array_x = np.array(init_array_x[begin_ind:(end_ind + 1)])
    # print("len after", len(np_init_array_x))

    # DETERMINE NUMBER OF BREAKPOINTS
    n_bpoints = int(math.ceil(len(np_init_array_x) / n_snps_bw_bpoints - 1))
    # flat.print_log_msg('* Number of breakpoints: '+repr(n_bpoints))

    # print("hiya")
    # result = [filt.apply_filter_get_minima(np_init_array, width) for width in range(0, 1000)]
    # print(result)
    # raise
    # SEARCH FOR FILTER WIDTH
    # flat.print_log_msg('* Starting search...')
    found_width = find_minima.custom_binary_search_with_trackback(
        np_init_array,
        filt.apply_filter_get_minima,
        n_bpoints,
        trackback_delta=trackback_delta,
        trackback_step=trackback_step,
        init_search_location=init_search_location)
    # flat.print_log_msg('* Found_width: ' + repr(found_width))

    # GET MINIMA LOCATIONS
    flat.print_log_msg('* Applying filter and getting minima locations...')

    "just applies hanning to init_array"
    g = filt.apply_filter(np_init_array, found_width)
    # print("raise", g)
    # print("raise", np_init_array)
    # print("raise", np_init_array_x)
    breakpoint_loci = filt.get_minima_loc(g, np_init_array_x)
    # print("raise", breakpoint_loci)
    # raise

    # METRIC
    # flat.print_log_msg('* Calculating metric for non-uniform breakpoints (minima of filtered data)...')

    # metric_out = apply_metric(chr_name, begin, end, cnst.const[dataset], breakpoint_loci)
    metric_out = apply_metric(chr_name, begin, end, config, breakpoint_loci)
    # flat.print_log_msg('Global metric:')
    print("raise", metric_out)
    raise

    # print_metric(metric_out)

    # METRIC FOR UNIFORM BREAKPOINTS
    # flat.print_log_msg('* Calculating metric for uniform breakpoints...')
    # # step = int((end-begin)/(len(breakpoint_loci)+1))
    # # breakpoint_loci_uniform = [l for l in range(begin+step, end-step+1, step)]
    # step = int(len(init_array_x)/(len(breakpoint_loci)+1))
    # breakpoint_loci_uniform = [init_array_x[i] for i in range(step, len(init_array_x)-step+1, step)]

    # # metric_out_uniform = apply_metric(chr_name, begin, end, cnst.const[dataset], breakpoint_loci_uniform)
    # metric_out_uniform = apply_metric(chr_name, begin, end, config, breakpoint_loci_uniform)
    # flat.print_log_msg('Global metric:')
    # print_metric(metric_out_uniform)

    # LOCAL SEARCH ON FOURIER - missing N runs
    flat.print_log_msg('* Running local search for fourier...')

    # breakpoint_loci_local_search = run_local_search_complete(chr_name, breakpoint_loci, begin, end, cnst.const[dataset], metric_out)
    breakpoint_loci_local_search = run_local_search_complete(
        chr_name, breakpoint_loci, begin, end, config, metric_out)
    print(breakpoint_loci_local_search)
    raise

    # RUN METRIC AGAIN W/ NEW BREAKPOINTS FROM FOURIER LOCAL SEARCH
    flat.print_log_msg('* Calculating metric for new fourier breakpoints...')

    # metric_out_local_search = apply_metric(chr_name, begin, end, cnst.const[dataset], breakpoint_loci_local_search['loci'])
    metric_out_local_search = apply_metric(
        chr_name, begin, end, config, breakpoint_loci_local_search['loci'])
    flat.print_log_msg('Global metric:')
    print_metric(metric_out_local_search)

    # LOCAL SEARCH ON UNIFORM - missing N runs
    flat.print_log_msg('* Running local search for uniform breakpoints...')

    # breakpoint_loci_uniform_local_search = run_local_search_complete(chr_name, breakpoint_loci_uniform, begin, end, cnst.const[dataset], metric_out_uniform)
    breakpoint_loci_uniform_local_search = run_local_search_complete(
        chr_name, breakpoint_loci_uniform, begin, end, config,
        metric_out_uniform)

    # RUN METRIC AGAIN W/ NEW BREAKPOINTS FROM UNIFORM
    flat.print_log_msg('* Calculating metric for new uniform breakpoints...')

    # metric_out_uniform_local_search = apply_metric(chr_name, begin, end, cnst.const[dataset], breakpoint_loci_uniform_local_search['loci'])
    metric_out_uniform_local_search = apply_metric(
        chr_name, begin, end, config,
        breakpoint_loci_uniform_local_search['loci'])
    flat.print_log_msg('Global metric:')
    print_metric(metric_out_uniform_local_search)

    # DUMP DATA INTO PICKLE SO IT CAN BE ANALYZED AND LOOKED AT WITHOUT RE-RUNNING EVERYTHING
    pickle_out = {}
    pickle_out['argv'] = sys.argv
    pickle_out['n_bpoints'] = n_bpoints
    pickle_out['found_width'] = found_width
    pickle_out['fourier'] = {}
    pickle_out['fourier']['loci'] = breakpoint_loci
    pickle_out['fourier']['metric'] = metric_out
    pickle_out['uniform'] = {}
    pickle_out['uniform']['loci'] = breakpoint_loci_uniform
    pickle_out['uniform']['metric'] = metric_out_uniform
    pickle_out[
        'fourier_ls'] = breakpoint_loci_local_search  # Yes, breakpoint_loci_local_search is already a dict with 'loci' and 'metrics' keys
    pickle_out['fourier_ls']['metric'] = metric_out_local_search
    pickle_out['uniform_ls'] = breakpoint_loci_uniform_local_search
    pickle_out['uniform_ls']['metric'] = metric_out_uniform_local_search

    t = datetime.datetime.now()
    t_formatted = t.strftime('%Y_%m_%d_%H_%M_%S')

    # pickle_dump_fname = 'pickle-'+dataset+'-'+chr_name+'-'+str(n_bpoints)+'-'+str(begin)+'-'+str(end)+'-'+t_formatted+'.pickle'
    with open(out_fname, 'wb') as f_out:
        pickle.dump(pickle_out, f_out)

    flat.print_log_msg('Done')
Example #17
0
    def calc_diag(self, dynamic_delete=True):
        # flat.print_log_msg('Removing existing matrix output file')
        # try:
        #     os.remove(cnst.const['out_matrix_delim'])
        # except OSError:
        #     pass

        self.dynamic_delete = dynamic_delete

        flat.print_log_msg('Start')

        # pre-read all relevant partitions at beginning!
        last_p_num = -1
        for p_num_init in range(0, len(self.partitions) - 1):
            if self.snp_first >= self.partitions[p_num_init + 1][0]:
                flat.print_log_msg('Pre-reading partition: ' +
                                   str(self.partitions[p_num_init]))
                flat.read_partition_into_matrix(self.partitions, p_num_init,
                                                self.matrix, self.locus_list,
                                                self.name, self.input_config,
                                                self.snp_first, self.snp_last)
                last_p_num = p_num_init
            else:
                break

        curr_locus = -1
        # for p_num, p in enumerate(self.partitions):
        for p_num in range(last_p_num + 1, len(self.partitions)):
            p = self.partitions[p_num]

            flat.print_log_msg('Reading partition: ' + str(p))
            flat.read_partition_into_matrix(self.partitions, p_num,
                                            self.matrix, self.locus_list,
                                            self.name, self.input_config,
                                            self.snp_first, self.snp_last)

            # Determine first locus
            if curr_locus < 0:  # Either first partition or not found in first partition
                # curr_locus = -1 # <- this should have been set to -1 before entering the main for loop
                if len(self.locus_list) > 0:
                    # Find first locus >= snp_first
                    for i, locus in enumerate(self.locus_list):
                        if locus >= self.snp_first:
                            curr_locus = locus
                            start_locus = locus
                            curr_locus_index = i
                            start_locus_index = i
                            break
                else:
                    raise Exception('Error: locus_list seems to be empty')
            # else:
            # 	if len(self.locus_list)>0:
            # 		curr_locus = self.locus_list[0]
            # 		curr_locus_index = 0
            # 	else:
            # 		raise Exception('Error: locus_list seems to be empty')
            else:
                try:
                    curr_locus_index = self.locus_list.index(curr_locus)
                    # curr_locus is carried from prev iteration, but index has changed since part of matrix (and locus_list) has been deleted
                except ValueError:
                    if len(self.locus_list) > 0:
                        curr_locus = self.locus_list[0]
                        curr_locus_index = 0
                    else:
                        raise Exception('Error: locus_list seems to be empty')

            if curr_locus < 0:
                flat.print_log_msg(
                    'Warning: curr_locus not found! Continuing to next partition.'
                )
                flat.print_log_msg(
                    'Comment: This is possibly due to snp_first being very close to end of partition.'
                )
                flat.print_log_msg('Details: ')
                flat.print_log_msg('Partition: ' + repr(p))
                flat.print_log_msg('snp_first: ' + repr(self.snp_first))
                flat.print_log_msg('curr_locus: ' + repr(curr_locus))
                continue  #continue to next partition
                # raise Exception('Error: curr_locus not found!')

            # Determine end locus
            if p_num + 1 < len(self.partitions):
                end_locus = int(
                    (self.partitions[p_num][1] + self.partitions[p_num + 1][0])
                    / 2)
            else:
                # end_locus = self.partitions[p_num][1]

                # Find last locus <= snp_last
                end_locus_found = False
                for i in reversed(range(0, len(self.locus_list))):
                    # for locus in reversed(locus_list):
                    if self.locus_list[i] <= self.snp_last:
                        end_locus = self.locus_list[i]
                        end_locus_index = i
                        end_locus_found = True
                        break

                if not end_locus_found:
                    end_locus_index = 0
                    end_locus = self.locus_list[end_locus_index]

            flat.print_log_msg('Running for partition: ' + str(p))
            # This will not include the very last SNP of the complete range, but that shouldn't be too important since the end of the range shouldn't be a defining location for LD
            while curr_locus <= end_locus:
                x = self.locus_list[curr_locus_index]
                y = self.locus_list[curr_locus_index]
                delta = 0

                while x >= self.partitions[p_num][0] and y <= self.partitions[
                        p_num][1]:
                    if x in self.matrix and y in self.matrix[x]['data']:
                        corr_coeff = self.matrix[x]['data'][y][
                            'shrink'] / math.sqrt(
                                self.matrix[x]['data'][x]['shrink'] *
                                self.matrix[y]['data'][y]['shrink'])
                        self.add_corr_coeff(corr_coeff, curr_locus)

                        # Just save it in the matrix ;) ...for img
                        self.matrix[x]['data'][y]['corr_coeff'] = corr_coeff

                    if delta != 0:
                        x = self.locus_list[curr_locus_index - delta + 1]
                        if x in self.matrix and y in self.matrix[x]['data']:
                            corr_coeff = self.matrix[x]['data'][y][
                                'shrink'] / math.sqrt(
                                    self.matrix[x]['data'][x]['shrink'] *
                                    self.matrix[y]['data'][y]['shrink'])
                            self.add_corr_coeff(corr_coeff, curr_locus)

                            # Just save it in the matrix ;) ...for img
                            self.matrix[x]['data'][y][
                                'corr_coeff'] = corr_coeff

                    delta += 1
                    if curr_locus_index - delta >= 0:
                        x = self.locus_list[curr_locus_index - delta]
                    else:
                        # flat.print_log_msg('X index out of bounds')
                        break

                    if curr_locus_index + delta < len(self.locus_list):
                        y = self.locus_list[curr_locus_index + delta]
                    else:
                        # flat.print_log_msg('Y index out of bounds')
                        break

                if curr_locus_index + 1 < len(self.locus_list):
                    curr_locus_index += 1
                    curr_locus = self.locus_list[curr_locus_index]
                else:
                    flat.print_log_msg('curr_locus_index out of bounds')
                    break

            # flat.delete_loci_smaller_than_and_output_matrix_to_file(end_locus, self.matrix, locus_list, locus_list_deleted, cnst.const['out_matrix_filename'])
            if self.dynamic_delete:
                flat.print_log_msg('Deleting loci not required any more')
                if p_num + 1 < len(self.partitions):
                    delete_loc = self.partitions[p_num +
                                                 1][0]  # diag - specific
                else:
                    delete_loc = end_locus

                flat.delete_loci_smaller_than(delete_loc, self.matrix,
                                              self.locus_list,
                                              self.locus_list_deleted)
            else:
                flat.print_log_msg('locus_list size: ' +
                                   repr(len(self.locus_list)))

        self.start_locus = start_locus
        self.start_locus_index = start_locus_index
        self.end_locus = end_locus
        self.end_locus_index = end_locus_index

        self.calculation_complete = True
Example #18
0
    def generate_img(self, img_full_path, marked_snp=None):
        import numpy as np
        import matplotlib as mpl
        # mpl.use('svg')
        mpl.use('Agg')
        import matplotlib.pyplot as pt
        mpl.rcParams.update({'font.size': 22})
        # 		import svgwrite

        # if center is None:
        # 	first = self.snp_first
        # 	last = self.snp_last
        # else:
        # 	if distance_in_snps is None:
        # 		raise Exception('Error: center is defined, but distance_in_snps is not!')
        # 	else:
        # 		first, last = self.query_locus_list(center, distance_in_snps)

        if not self.calculation_complete:
            raise Exception(
                'Error: Calculation has not been completed prior to image generation'
            )

        if self.dynamic_delete:
            raise Exception(
                'Error: The matrix was dynamically deleted - cannot generate full image!'
            )

        if len(self.matrix) <= 0:
            raise Exception('Error: The matrix is emmpty or erroneous')

        flat.print_log_msg('Image init')
        # svg_document = svgwrite.Drawing(filename = cnst.const['svg_out_fname'],
        #                                 size = (self.end_locus_index-self.start_locus_index, self.end_locus_index-self.start_locus_index))
        # Draw background
        # svg_document.add(svg_document.rect(insert=(0, 0), size=('100%', '100%'), rx=None, ry=None, fill='rgb(0,0,0)'))

        plot_mtrx_size = self.end_locus_index - self.start_locus_index + 1
        plot_mtrx = [[0 for x in range(plot_mtrx_size)]
                     for x in range(plot_mtrx_size)]

        flat.print_log_msg('Plot matrix size: ' + str(plot_mtrx_size))

        flat.print_log_msg('Matrix size: ' + str(len(self.matrix)))

        flat.print_log_msg('locus_list size: ' + str(len(self.locus_list)))

        flat.print_log_msg('locus_list_deleted size: ' +
                           str(len(self.locus_list_deleted)))

        x_values = [0 for x in range(plot_mtrx_size)]

        flat.print_log_msg('Generating image data')
        for loc_i in self.matrix:
            if loc_i >= self.snp_first and loc_i <= self.snp_last:
                x_values[self.locus_list.index(loc_i) -
                         self.start_locus_index] = loc_i

                for loc_j in self.matrix[loc_i]['data']:
                    if loc_j >= self.snp_first and loc_j <= self.snp_last:
                        # if len(svg_loci)<svg_length:
                        if 'corr_coeff' in self.matrix[loc_i]['data'][loc_j]:
                            # color = 255* ( 1- ( self.matrix[loc_i]['data'][loc_j]['corr_coeff'] ** 2 ) )
                            try:
                                plot_mtrx[self.locus_list.index(loc_i) -
                                          self.start_locus_index][
                                              self.locus_list.index(loc_j) -
                                              self.start_locus_index] = (
                                                  (self.matrix[loc_i]['data']
                                                   [loc_j]['corr_coeff'])**2)
                            except IndexError:
                                print(
                                    self.locus_list.index(loc_i) -
                                    self.start_locus_index)
                                print(len(plot_mtrx))
                                print(
                                    self.locus_list.index(loc_j) -
                                    self.start_locus_index)
                                print(
                                    len(plot_mtrx[self.locus_list.index(loc_i)
                                                  - self.start_locus_index]))
                            # svg_document.add(svg_document.rect(insert = (self.locus_list.index(loc_i)-self.start_locus_index, self.locus_list.index(loc_j)-self.start_locus_index),
                            #                              size = ('1', '1'),
                            #                              fill = 'rgb(255,'+str(int(color))+','+str(int(color))+')'))
                            # svg_loci.add(curr_locus)
                        else:
                            flat.print_log_msg("No 'corr_coef' key at: " +
                                               str(loc_i) + ' ' + str(loc_j))
                            # raise Exception('WTF')

        flat.print_log_msg('Writing image file...')

        fig = pt.gcf()
        dpi = fig.get_dpi()
        fig_size = fig.get_size_inches()

        # pt.pcolor(np.array(plot_mtrx), cmap='Reds', vmin=0, vmax=1)
        pt.pcolormesh(np.array(plot_mtrx), cmap='binary', vmin=0, vmax=1)

        pt.colorbar()
        # x_values = np.array(x_values) # needs to be numpy array for pcolormesh()
        # X, Y = np.meshgrid(x_values, x_values)
        # pt.pcolormesh(X, Y, np.array(plot_mtrx), cmap='Reds', vmin=0, vmax=1)

        if marked_snp is not None:
            bpoint_loc = x_values.index(marked_snp)

            pt.scatter((bpoint_loc), (bpoint_loc), marker='x', color='green')

            flat.print_log_msg('SNP: ' + repr(marked_snp) + ' @ index: ' +
                               repr(bpoint_loc) + ' in graph')

        fig = pt.gcf()
        fig.set_size_inches((40, 30))

        pt.xlabel('SNP #')
        pt.ylabel('SNP #')
        pt.title('Correlation coefficient squared matrix')

        pt.savefig(img_full_path)
Example #19
0
    def calc_vert(self, dynamic_delete=True, sum_both_sides=True):
        # flat.print_log_msg('Removing existing matrix output file')
        # try:
        #     os.remove(cnst.const['out_matrix_delim'])
        # except OSError:
        #     pass

        raise Exception('calc_vert is deprecated - check code before running!')

        self.dynamic_delete = dynamic_delete

        flat.print_log_msg('Start')

        for p_num, p in enumerate(self.partitions):
            flat.print_log_msg('Reading partition: ' + str(p))
            flat.read_partition_into_matrix(self.partitions, p_num,
                                            self.matrix, self.locus_list,
                                            self.name, self.input_config,
                                            self.snp_first, self.snp_last)

            # Determine first locus
            curr_locus = -1
            if p_num == 0:
                if len(self.locus_list) > 0:
                    # Find first locus >= snp_first
                    for i, locus in enumerate(self.locus_list):
                        if locus >= self.snp_first:
                            curr_locus = locus
                            start_locus = locus
                            curr_locus_index = i
                            start_locus_index = i
                            break
                else:
                    raise Exception('Error: locus_list seems to be empty')
            else:
                if len(self.locus_list) > 0:
                    curr_locus = self.locus_list[0]
                    curr_locus_index = 0
                else:
                    raise Exception('Error: locus_list seems to be empty')

            if curr_locus < 0:
                raise Exception('Error: curr_locus not found!')

            if p_num + 1 < len(self.partitions):
                end_locus = self.partitions[p_num + 1][0]
                end_locus_index = -1
            else:
                # end_locus = partitions[p_num][1]

                # Find last locus <= snp_last
                for i in reversed(range(0, len(self.locus_list))):
                    # for locus in reversed(locus_list):
                    if self.locus_list[i] <= self.snp_last:
                        end_locus = self.locus_list[i]
                        end_locus_index = i
                        break

            flat.print_log_msg('Running for partition: ' + str(p))
            # This will not include the very last SNP of the complete range, but that shouldn't be too important since the end of the range shouldn't be a defining location for LD
            while curr_locus < end_locus:
                for key, el in self.matrix[curr_locus]['data'].items():
                    corr_coeff = self.matrix[curr_locus]['data'][key][
                        'shrink'] / math.sqrt(
                            self.matrix[curr_locus]['data'][curr_locus]
                            ['shrink'] *
                            self.matrix[key]['data'][key]['shrink'])
                    self.add_corr_coeff(corr_coeff, curr_locus)
                    if sum_both_sides:
                        self.add_corr_coeff(corr_coeff, key)

                    # Just save it in the matrix ;)
                    self.matrix[curr_locus]['data'][key][
                        'corr_coeff'] = corr_coeff

                if curr_locus_index + 1 < len(self.locus_list):
                    curr_locus_index += 1
                    curr_locus = self.locus_list[curr_locus_index]
                else:
                    flat.print_log_msg('curr_locus_index out of bounds')
                    break

            # flat.delete_loci_smaller_than_and_output_matrix_to_file(end_locus, self.matrix, locus_list, locus_list_deleted, cnst.const['out_matrix_filename'])
            if self.dynamic_delete:
                flat.print_log_msg('Deleting loci not required any more')
                flat.delete_loci_smaller_than(end_locus, self.matrix,
                                              self.locus_list,
                                              self.locus_list_deleted)

        self.start_locus = start_locus
        self.start_locus_index = start_locus_index
        self.end_locus = end_locus
        self.end_locus_index = end_locus_index

        self.calculation_complete = True
Example #20
0
def print_metric(metric_out):
    flat.print_log_msg('Sum: ' + repr(metric_out['sum']))
    flat.print_log_msg('N (w/ zero\'s): ' + repr(metric_out['N_zero']))
    flat.print_log_msg('Metric: ' +
                       repr(metric_out['sum'] / metric_out['N_zero']))
Example #21
0
    def search(self):
        print("----- Running search")
        if not self.init_complete:
            flat.print_log_msg('init_search() must be run before search(). Starting automatically...')
            self.init_search()
            
        flat.print_log_msg('Starting local search...')

        print("addy", len(self.precomputed['data']))

        print("len(locus_list)", len(self.precomputed["locus_list"]))
        print("locus_list", self.precomputed["locus_list"][:5], self.precomputed["locus_list"][-5:])
        # In case the value itself is not in the list:
        try:
            print("hiihihih", self.snp_bottom, self.snp_top)
            snp_bottom_ind = binsrch.find_ge_ind(self.precomputed['locus_list'], self.snp_bottom)
            snp_top_ind = binsrch.find_le_ind(self.precomputed['locus_list'], self.snp_top)
        except Exception as e:
            flat.print_log_msg('Error2!')
            flat.print_log_msg(repr(e))
            flat.print_log_msg('self.precomputed[\'locus_list\']: '+repr(self.precomputed['locus_list']))
            flat.print_log_msg('self.snp_bottom: '+repr(self.snp_bottom))
            flat.print_log_msg('self.snp_first: '+repr(self.snp_first))
            flat.print_log_msg('self.snp_last: '+repr(self.snp_last))
            flat.print_log_msg('self.snp_top: '+repr(self.snp_top))
            flat.print_log_msg('self.__dict__: '+repr(self.__dict__))
            flat.print_log_msg('Continuing...')
            return self.breakpoints[self.initial_breakpoint_index], None


        print("self.snp_bottom", self.snp_bottom) #, len(self.precomputed["locus_list"]))
        print("self.snp_top", self.snp_top)
        print("self.initial_breakpoint_index", self.initial_breakpoint_index)
        print("snp_bottom_ind", snp_bottom_ind)
        print("snp_top_ind", snp_top_ind)

        # Old:
        # snp_first_ind = self.precomputed['locus_list'].index(self.snp_first) # This should be snp_bottom
        # snp_top_ind = self.precomputed['locus_list'].index(self.snp_top) 
        
        # Start from init breakpoint and search left. Then start from init_breakpoint again and search right.
        # We start from init_breakpoint because that's the initial sum and N that we have -> so we can use the precomputed data to incrementally check for 
        # Find the closest locus to the breakpoint value, because a breakpoint doesn't necessarily have to be in the locus_list
        breakpoint_index_in_locus_list = binsrch.find_le_ind(self.precomputed['locus_list'], self.breakpoints[self.initial_breakpoint_index])
        # print("breakpoint_index_in_locus_list", breakpoint_index_in_locus_list)
        # print("breakpoint_index_in_locus_list", self.precomputed["locus_list"])
        print("breakpoint_index_in_locus_list", len(self.precomputed["locus_list"]))
        init_breakpoint_locus = self.precomputed['locus_list'][breakpoint_index_in_locus_list]
        # Old:
        # breakpoint_index_in_locus_list = self.precomputed['locus_list'].index(self.breakpoints[self.initial_breakpoint_index])

        curr_sum = self.total_sum
        curr_N = self.total_N
        print("curr_sum", curr_sum)
        print("curr_N", curr_N)

        min_metric = decimal.Decimal(self.total_sum) / decimal.Decimal(self.total_N)
        min_breakpoint = None

        min_metric_details = {}
        min_metric_details['sum'] = self.total_sum
        min_metric_details['N_zero'] = self.total_N
        min_distance_right = 0 # because the initial distance of the minimum actually is 0! (until we find a new minima to the RIGHT, or we don't in which case it doesn't matter)
        # print("pre", self.precomputed['data'][39967768]['sum_horiz'], self.precomputed['data'][39967768]['sum_vert'])


        # Go RIGHT!
        flat.print_log_msg('Searching right...')
        if breakpoint_index_in_locus_list+1 < len(self.precomputed['locus_list']):
            curr_loc_ind = breakpoint_index_in_locus_list+1
            curr_loc = self.precomputed['locus_list'][curr_loc_ind]

            # counter = 0
            # print("self.snp_last", self.snp_last)
            while curr_loc <= self.snp_last:
                # print("curr_loc", curr_loc)
                # print(curr_loc, "curr_sum", curr_sum, self.precomputed['data'][curr_loc]['sum_horiz'], self.precomputed['data'][curr_loc]['sum_vert'])
                curr_sum = curr_sum - self.precomputed['data'][curr_loc]['sum_horiz'] + self.precomputed['data'][curr_loc]['sum_vert']
                # counter += 1
                
                # print("_N curr_loc_ind", curr_loc_ind, snp_top_ind)
                horiz_N = curr_loc_ind-snp_bottom_ind-1
                vert_N = snp_top_ind-curr_loc_ind
                curr_N = curr_N - horiz_N + vert_N
                # print("horiz_N", horiz_N)
                # print("vert_N", vert_N)
                # print("curr_N", curr_N)
                
                curr_metric = decimal.Decimal(curr_sum) / decimal.Decimal(curr_N)
                # print("curr_loc", curr_loc, "curr_metric", curr_metric)
                
                if curr_metric < min_metric:
                    min_metric = curr_metric
                    min_breakpoint = curr_loc
                    min_metric_details['sum'] = curr_sum
                    min_metric_details['N_zero'] = curr_N
                    min_distance_right = curr_loc - init_breakpoint_locus
                    # print("min_metric", min_metric, min_breakpoint)
                    # print("min_metric", min_metric, min_breakpoint, min_distance_right)

                
                if curr_loc_ind+1 < len(self.precomputed['locus_list']):
                    curr_loc_ind += 1
                    curr_loc = self.precomputed['locus_list'][curr_loc_ind]
                else:
                    flat.print_log_msg('curr_locus_index out of bounds') # The possibility of this happening is only at the end of the chromosome (end of last partition)
                    break
        else:
            flat.print_log_msg('Warning: breakpoint_index_in_locus_list+1 < len(self.precomputed["locus_list"]) not satisfied!')
            flat.print_log_msg('Breakpoints: '+repr(self.breakpoints))
            flat.print_log_msg('Locus_list: '+repr(self.precomputed['locus_list']))
            flat.print_log_msg('breakpoint_index_in_locus_list: '+ repr(breakpoint_index_in_locus_list))
        
        print("min_metric", min_metric, min_breakpoint, min_distance_right)

        # print("counter", counter)
        # Reset search for left
        curr_sum = self.total_sum
        curr_N = self.total_N

        # Go LEFT!    
        flat.print_log_msg('Searching left...')
        if breakpoint_index_in_locus_list-1 >= 0:
            curr_loc_ind = breakpoint_index_in_locus_list-1
            curr_loc = self.precomputed['locus_list'][curr_loc_ind]
            
            curr_sum = self.total_sum
            curr_N = self.total_N
            
            while curr_loc > self.snp_first: # Don't include previous breakpoint!
                curr_sum = curr_sum + self.precomputed['data'][curr_loc]['sum_horiz'] - self.precomputed['data'][curr_loc]['sum_vert']
                
                horiz_N = curr_loc_ind-snp_bottom_ind-1
                vert_N = snp_top_ind-curr_loc_ind
                curr_N = curr_N + horiz_N - vert_N
                
                curr_metric = decimal.Decimal(curr_sum) / decimal.Decimal(curr_N)
                
                if (curr_metric < min_metric) or (curr_metric == min_metric and (init_breakpoint_locus - curr_loc)<min_distance_right): # min_distance_right is used to compare to RIGHT metric, not within LEFT metric!
                    min_metric = curr_metric
                    min_breakpoint = curr_loc
                    min_metric_details['sum'] = curr_sum
                    min_metric_details['N_zero'] = curr_N

                if curr_loc_ind-1 >= 0:
                    curr_loc_ind -= 1
                    curr_loc = self.precomputed['locus_list'][curr_loc_ind]
                else:
                    flat.print_log_msg('curr_locus_index out of bounds') # The possibility of this happening is only at the beginning of the chromosome (start of first partition)
                    break
        else:
            flat.print_log_msg('Warning: breakpoint_index_in_locus_list-1 >=0 not satisfied!')
            flat.print_log_msg('Breakpoints: '+repr(self.breakpoints))
            flat.print_log_msg('Locus_list: '+repr(self.precomputed['locus_list']))
            flat.print_log_msg('breakpoint_index_in_locus_list: '+ repr(breakpoint_index_in_locus_list))
        
        self.search_complete = True
        
        flat.print_log_msg('Search done')
        
        return min_breakpoint, min_metric_details
Example #22
0
    def __init__(self, name, start_search, stop_search, initial_breakpoint_index, breakpoints, total_sum, total_N, input_config):
        decimal.getcontext().prec=50

        print(" *** start_search", start_search)
        print(" *** stop_search", stop_search)
        
        self.name = name
        self.start_search = start_search
        self.stop_search = stop_search
        self.initial_breakpoint_index = initial_breakpoint_index
        self.breakpoints = breakpoints
        self.total_sum = total_sum
        self.total_N = total_N
        self.input_config = input_config
        
        self.matrix = {}
        self.locus_list = []
        self.locus_list_deleted = []
        
        self.precomputed = {}
        self.precomputed['locus_list'] = [] # keep the ordering of loci -> allow for efficient iterating
        self.precomputed['data'] = {} # allow ~O(1) access to each element by it's locus
        
        self.dynamic_delete = True
        self.init_complete = False
        self.search_complete = False
        
        if start_search >= stop_search:
            raise Exception('Error: start_search >= stop_search')
        
        if initial_breakpoint_index>=len(breakpoints) or initial_breakpoint_index<0:
            raise Exception('Error: initial_breakpoint_index index out of bounds')
        
        if breakpoints[initial_breakpoint_index] >= stop_search:
            raise Exception('Error: breakpoint >= stop_search')
        
        if breakpoints[initial_breakpoint_index] <= start_search:
            raise Exception('Error: breakpoint <= start_search')
        
        # tmp_partitions = flat.read_partitions(self.name, self.input_config)
        tmp_partitions = flat.get_final_partitions(self.input_config, self.name, start_search, stop_search)
        
        
        if start_search < tmp_partitions[0][0] or start_search > tmp_partitions[len(tmp_partitions)-1][1]:
            raise Exception('Error: start_search is out of bounds')
        
        if stop_search < tmp_partitions[0][0] or stop_search > tmp_partitions[len(tmp_partitions)-1][1]:
            raise Exception('Error: stop_search is out of bounds')
        
        if initial_breakpoint_index > 0:
            if start_search < breakpoints[initial_breakpoint_index-1]:
                raise Exception('Error: start_search cannot be further than a neighboring breakpoint')
        else:
            pass # this is just to emphasize that this has been thought through and covered. It's taken care of when testing for start_search < tmp_partitions[0][0]
        
        if initial_breakpoint_index < (len(breakpoints)-1):
            if stop_search > breakpoints[initial_breakpoint_index+1]:
                raise Exception('Error: stop_search cannot be further than a neighboring breakpoint')
        else:
            pass # this is just to emphasize that this has been thought through and covered. It's taken care of when testing for stop_search > tmp_partitions[len(tmp_partitions)-1][1]
        
        # work out snp_first, snp_last - watch out if it's the first or last breakpoint
        
        # # snp_first defines where to start reading data
        # if initial_breakpoint_index > 0:
        #     self.snp_first = breakpoints[initial_breakpoint_index-1]
        # else:
        #     self.snp_first = tmp_partitions[0][0] # this gets the first SNP in the chromosome (setting it just to 1 would cause flat.relevant_subpartitions() and consequently flat.get_final_partitions() to fail)
  
        # The previous (above) was not taking into account start_search, but just assumed where search started!
        self.snp_first = start_search

        flat.print_log_msg('snp_first: '+repr(self.snp_first))

        # snp_last defined where to stop reading data
        self.snp_last = stop_search

        flat.print_log_msg('snp_last: '+repr(self.snp_last))

        # This is the upper bound for the search space (upper border)
        if initial_breakpoint_index+1 < len(breakpoints):
            self.snp_top = breakpoints[initial_breakpoint_index+1]
        else:
            self.snp_top = tmp_partitions[len(tmp_partitions)-1][1]

        flat.print_log_msg('snp_top: '+repr(self.snp_top))

        # This is the bottom bound for the search space (bottom border)
        if initial_breakpoint_index-1 >= 0:
            self.snp_bottom = breakpoints[initial_breakpoint_index-1]
        else:
            self.snp_bottom = tmp_partitions[0][0]

        flat.print_log_msg('snp_bottom: '+repr(self.snp_bottom))

        # flat.print_log_msg('In local search: ')
        # flat.print_log_msg(repr(self.snp_first)+' '+repr(self.snp_last)+' '+repr(self.snp_top))

        # Data must be read until snp_top!
        self.partitions = flat.get_final_partitions(self.input_config, self.name, self.snp_bottom, self.snp_top)

        # flat.print_log_msg('self.partitions: ')
        # flat.print_log_msg(repr(self.partitions))
        
        self.start_locus = -1
        self.start_locus_index = -1
        self.end_locus = -1
        self.end_locus_index = -1
Example #23
0
    def init_search_lean(self):
        # flat.print_log_msg('Removing existing matrix output file')
        # try:
        #     os.remove(cnst.const['out_matrix_delim'])
        # except OSError:
        #     pass
    
        if not self.dynamic_delete:
            raise Exception('Error: dynamic_delete should be True for local search!') 
    
        flat.print_log_msg('Start local search init') 
        print("self.partitions", self.partitions)

        # pre-read all relevant partitions at beginning!
        last_p_num = -1
        for p_num_init in range(0, len(self.partitions)-1):
            print("p_num_init", p_num_init)
            if self.snp_bottom >= self.partitions[p_num_init+1][0]:
                flat.print_log_msg('Pre-reading partition: '+str(self.partitions[p_num_init])) 
                flat.read_partition_into_matrix_lean(self.partitions, p_num_init, self.matrix, self.locus_list, self.name, self.input_config, self.snp_bottom, self.snp_top)
                last_p_num = p_num_init
            else:
                break

        print("===", self.snp_bottom, self.snp_top, self.snp_first, self.snp_last)

        # print("after reading, len is", len(self.locus_list))
        curr_locus = -1
        # for p_num, p in enumerate(self.partitions):
        for p_num in range(last_p_num+1, len(self.partitions)):
            print("p_num", p_num)
            p = self.partitions[p_num]

            flat.print_log_msg('Reading partition: '+str(p))
            # Data must be read until snp_top!
            flat.read_partition_into_matrix_lean(self.partitions, p_num, self.matrix, self.locus_list, self.name, self.input_config, self.snp_bottom, self.snp_top)

            # Determine first locus
            if curr_locus<0: # Either first partition or not found in first partition
                # curr_locus = -1 # <- this should have been set to -1 before entering the main for loop
                if len(self.locus_list)>0:
                    # Find first locus >= snp_bottom
                    for i, locus in enumerate(self.locus_list):
                        if locus >= self.snp_bottom:
                            curr_locus = locus
                            start_locus = locus
                            curr_locus_index = i
                            start_locus_index = i
                            break
                else:
                    raise Exception('Error: locus_list seems to be empty') 
            # else:
            #   if len(self.locus_list)>0:
            #       curr_locus = self.locus_list[0]
            #       curr_locus_index = 0
            #   else:
            #       raise Exception('Error: locus_list seems to be empty')
            else:
                try:
                    curr_locus_index = self.locus_list.index(curr_locus)
                    # curr_locus is carried from prev iteration, but index has changed since part of matrix (and locus_list) has been deleted
                except ValueError:
                    if len(self.locus_list)>0:
                        curr_locus = self.locus_list[0]
                        curr_locus_index = 0
                    else:
                        raise Exception('Error: locus_list seems to be empty')


            if curr_locus<0:
                flat.print_log_msg('Warning: curr_locus not found! Continuing to next partition.')
                flat.print_log_msg('Comment: This is possibly due to snp_bottom being very close to end of partition.')
                flat.print_log_msg('Details: ')
                flat.print_log_msg('Partition: '+repr(p))
                flat.print_log_msg('snp_bottom: '+repr(self.snp_bottom))
                flat.print_log_msg('curr_locus: '+repr(curr_locus)) 
                continue #continue to next partition
                # raise Exception('Error: curr_locus not found!')   

            # print("start locus is", curr_locus)

            print("self.snp_last", self.snp_last)
            if p_num+1 < len(self.partitions):
                end_locus = self.partitions[p_num+1][0]
                end_locus_index = -1
            else:
                # end_locus = self.partitions[p_num][1]

                # Find last locus <= snp_last
                end_locus_found = False
                for i in reversed(range(0, len(self.locus_list))):
                # for locus in reversed(locus_list):
                    if self.locus_list[i] <= self.snp_last:
                        end_locus = self.locus_list[i]
                        # print("setting end locus to", end_locus, "snp last was", self.snp_last)
                        end_locus_index = i
                        end_locus_found = True
                        break

                if not end_locus_found:
                    end_locus_index = 0
                    end_locus = self.locus_list[end_locus_index]

            # flat.print_log_msg('self.locus_list control output: '+repr(self.locus_list))

            flat.print_log_msg('Running precompute for partition: '+str(p))

            flat.print_log_msg('start_locus: '+repr(start_locus)+' end_locus: '+repr(end_locus)+' end_locus_index '+repr(end_locus_index))
            # This will not include the very last SNP of the complete range, but that shouldn't be too important since the end of the range shouldn't be a defining location for LD
            print("checking that curr_locus is smaller than", curr_locus, end_locus, "oooo")

            while curr_locus <= end_locus:                     
                self.add_locus_to_precomputed(curr_locus) # We want snp_bottom to be added here always (for later use). Same thing for snp_top
                
                # flat.print_log_msg('curr_locus: '+repr(curr_locus)+' end_locus: '+repr(end_locus))

                if (curr_locus > self.snp_first or self.initial_breakpoint_index == 0) and (curr_locus <= self.snp_last): # Do not include snp_first in the calculation unless the very first block is being taken into account. Do not calculate anything above snp_last, just insert dummies
                    for key, el in self.matrix[curr_locus].items():
                        # don't take into account anything over snp_top
                        if key <= self.snp_top:                        
                            # print("adding", curr_locus, key, el)
                            corr_coeff = self.matrix[curr_locus][key] / math.sqrt( self.matrix[curr_locus][curr_locus] * self.matrix[key][key] )
                            # print("ijval", curr_locus, key, self.matrix[curr_locus][key], corr_coeff, self.matrix[curr_locus][curr_locus], self.matrix[key][key])
                            # print("ijval", corr_coeff ** 2)
                            
    #                         if curr_locus != key: # Don't include diagonal! ...although not that important.
                            self.add_val_to_precomputed(decimal.Decimal(corr_coeff**2), curr_locus, key) # If the diagonal is included, it doesn't matter because later we add and subtract is exactly once when adding and subra
    #                         else:
    #                             self.add_val_to_precomputed(decimal.Decimal(0), curr_locus, key)
                else:
                    self.add_val_to_precomputed(decimal.Decimal(0), curr_locus, curr_locus) # Dummy value for snp_first! ...in order to be consistent for some other future use of these data structures
                    
                if curr_locus_index+1 < len(self.locus_list):
                    curr_locus_index+=1
                    curr_locus = self.locus_list[curr_locus_index]
                else:
                    flat.print_log_msg('curr_locus_index out of bounds' + str(len(self.precomputed['data'][curr_locus]['sum_horiz']))) # The possibility of this happening is only at the end of the range [usually chromosome] (end of last partition)
                    break
    
            print("len(self.locus_list)", len(self.locus_list))

            # flat.delete_loci_smaller_than_and_output_matrix_to_file(end_locus, self.matrix, locus_list, locus_list_deleted, cnst.const['out_matrix_filename'])
            if self.dynamic_delete:
                flat.print_log_msg('Deleting loci not required any more')
                flat.delete_loci_smaller_than_leanest(end_locus, self.matrix, self.locus_list)

    
        self.start_locus = start_locus
        self.start_locus_index = start_locus_index
        self.end_locus = end_locus
        self.end_locus_index = end_locus_index
    
        self.init_complete = True