def randomize(self): _annotation = annotation() _annotation.randomize() self.annotation = _annotation self.name = self.generate_name() self.identifiers = self.generate_identifiers( randomizer.randint(self.field_min_number, self.field_max_number))
def randomize(self, headers): common.usedRandomize() _annotation = annotation() _annotation.randomize() self.annotation = _annotation self.name = self.generate_name() self.headers = randomizer.sample(headers, randomizer.randint(0, len(headers)))
def load_annotation_file(self, annotation_file): """TODO: Docstring for load_annotation_file. :returns: TODO """ list_of_annotations = [] num_annotations = 0 root = ET.parse(annotation_file).getroot() folder = root.find('folder').text filename = root.find('filename').text size = root.find('size') disp_width = int(size.find('width').text) disp_height = int(size.find('height').text) for obj in root.findall('object'): bbox = obj.find('bndbox') xmin = int(bbox.find('xmin').text) xmax = int(bbox.find('xmax').text) ymin = int(bbox.find('ymin').text) ymax = int(bbox.find('ymax').text) width = xmax - xmin height = ymax - ymin if width > (kMaxRatio * disp_width) or height > (kMaxRatio * disp_height): continue if ((xmin < 0) or (ymin < 0) or (xmax <= xmin) or (ymax <= ymin)): continue objAnnotation = annotation() objAnnotation.setbbox(xmin, xmax, ymin, ymax) objAnnotation.setWidthHeight(disp_width, disp_height) objAnnotation.setImagePath(os.path.join(folder, filename)) list_of_annotations.append(objAnnotation) num_annotations = num_annotations + 1 return list_of_annotations, num_annotations
logger.info('A extraction of information is done.') ################# R1 : get sample information ######## (sample_dic, sample_list, normal_list, tumor_list, paired_list, unpaired_list) = get_sample_list(sample_file) print sample_dic file_dic = get_file_name(options.basecall, sample_list) print_sample(sample_list, normal_list, tumor_list, paired_list, unpaired_list) ############## Generation of picard object picard = picard(params, options.basecall, file_dic) bwa = bwa(params, file_dic) gatk = gatk(params, options.basecall, file_dic) somatic = somatic(params, file_dic) breakmer = breakmer(options.basecall, file_dic) anno = annotation(params, file_dic) ################ R2 : Extract Illumina Barcodes and Illumina Basecalls To Sam #################### job_id = get_start_jobid() cmd = [] cmd.append(picard.ExtractIlluminaBarcodes()) cmd.append(picard.IlluminaBasecallsToSam()) cmd2 = '\n'.join(cmd) file_name = '01.BaseCall.%s' % job_id wfile = open('%s/%s.sh' % (log_dir, file_name), 'w') wfile.write('%s\n' % pbs_header1(file_name, cpu_no['basecall'], cmd2)) wfile.close() cmd3 = 'qsub %s/%s.sh' % (log_dir, file_name)
import heatmap import annotation import volcano_plot import scatter_and_violin_plots # The outputs are already in the output_file. Running this client file will # give you the same thing as the outputs in that file # Will output a heatmap print(heatmap.heatmap()) # Will output a new csv file: results_L-vs-NL_full_annotated # It will show up within the Final Project folder # This will take some time as it needs to query print(annotation.annotation("results_L-vs-NL_full.csv")) # Will output a volcano plot print(volcano_plot.volcano_plot()) # Will output a scatter plot print(scatter_and_violin_plots.scatterplot()) # Will output a violin plot print(scatter_and_violin_plots.violin_plot())
def randomize(self): rnd = randomizer.randint(self.min_list_size, self.max_list_size) for x in range(0, rnd): _annotation = annotation() _annotation.randomize() self.annotation_list.append(_annotation)
################# R1 : get sample information ######## (sample_dic, sample_list, normal_list, tumor_list, paired_list, unpaired_list) = get_sample_list(sample_file) print sample_dic file_dic = get_file_name(options.basecall, sample_list) print_sample(sample_list, normal_list, tumor_list, paired_list, unpaired_list) ############## Generation of picard object picard = picard(params, options.basecall, file_dic) bwa = bwa(params, file_dic) gatk = gatk(params, options.basecall, file_dic) somatic = somatic(params, file_dic) breakmer = breakmer(options.basecall, file_dic) anno = annotation(params, file_dic) ################ R2 : Extract Illumina Barcodes and Illumina Basecalls To Sam #################### job_id = get_start_jobid() cmd = [] cmd.append(picard.ExtractIlluminaBarcodes()) cmd.append(picard.IlluminaBasecallsToSam()) cmd2 = '\n'.join(cmd) file_name = '01.BaseCall.%s' % job_id wfile = open('%s/%s.sh' % (log_dir, file_name),'w') wfile.write('%s\n' % pbs_header1(file_name, cpu_no['basecall'], cmd2)) wfile.close()
def main(args): programDirectory = os.path.dirname(os.path.abspath(__file__)) #read the project file projects = {} for file in os.listdir(os.path.join(programDirectory,"projects")): if file.endswith(".txt") and not file.endswith("example.txt"): with open(os.path.join(programDirectory,"projects" ,file)) as ongoing_fd: projectID=file.split("/")[-1] projectID=file.replace(".txt","") #the user has selected a project manually, and it is not this one #then there is really nothing to do. if not (args.project and (args.project != projectID)): projects[projectID]={}; for line in ongoing_fd: try: if line[0] != "#": info=line.strip(); info = info.split("\t") projects[projectID][info[0]]=info[1:] except: #the pipeline should not crash if the user adds some newlines etc to the project file pass # Read the config file (working_dir, available_tools, account, exclude,modules,recursive) = readConfigFile.readConfigFile(programDirectory) path_to_bam="" default_working_dir=working_dir for project in projects: #initiate the project parameters based on the project dictionary project_path = projects[project]["bam"] projectName = project #set the output,genmod and frequency db path if not projects[project]["output"]: working_dir = default_working_dir else: working_dir= projects[project]["output"][0] if not projects[project]["genmod"]: genmod_file = os.path.join(programDirectory,"genmod") else: genmod_file=projects[project]["genmod"][0] if not projects[project]["db"]: frequency_db=os.path.join(working_dir, project,"FindSV","database") else: frequency_db=projects[project]["db"][0] processFilesPath = os.path.join(working_dir, project,"process") #create a directory to keep track of the analysed files if not (os.path.exists(processFilesPath)): os.makedirs(processFilesPath) #initate the processFiles processFiles = initiateProcessFile(available_tools, processFilesPath) #search for the projects bam files bamfiles=detect_bam_files(project_path, projectName,path_to_bam,recursive) #function used to find variants processFiles= calling.variantCalling( programDirectory, project_path, projectName, working_dir, path_to_bam, available_tools, account, modules,bamfiles, exclude, processFiles, processFilesPath) #combine the results o the variant calling processFiles = combine.combine(programDirectory, processFiles, processFilesPath, account,bamfiles) #a function used to build databases from vcf files processFiles = database.buildDatabase(programDirectory, processFiles, processFilesPath, account) # Function that filters the variant files and finds genomic features of # the variants processFiles = filter.applyFilter(programDirectory, processFiles, processFilesPath, account,frequency_db) #function used to annotate the samples processFiles = annotation.annotation(programDirectory, processFiles, processFilesPath, account,genmod_file) #the funciton used for cleaning the vcf file, this is the final step of the pipeline processFiles = cleaning.cleaning(programDirectory, processFiles, processFilesPath, account) return