def randomize(self):
     _annotation = annotation()
     _annotation.randomize()
     self.annotation = _annotation
     self.name = self.generate_name()
     self.identifiers = self.generate_identifiers(
         randomizer.randint(self.field_min_number, self.field_max_number))
 def randomize(self, headers):
     common.usedRandomize()
     _annotation = annotation()
     _annotation.randomize()
     self.annotation = _annotation
     self.name = self.generate_name()
     self.headers = randomizer.sample(headers,
                                      randomizer.randint(0, len(headers)))
Esempio n. 3
0
    def load_annotation_file(self, annotation_file):
        """TODO: Docstring for load_annotation_file.
        :returns: TODO

        """
        list_of_annotations = []
        num_annotations = 0
        root = ET.parse(annotation_file).getroot()
        folder = root.find('folder').text
        filename = root.find('filename').text
        size = root.find('size')
        disp_width = int(size.find('width').text)
        disp_height = int(size.find('height').text)

        for obj in root.findall('object'):
            bbox = obj.find('bndbox')
            xmin = int(bbox.find('xmin').text)
            xmax = int(bbox.find('xmax').text)
            ymin = int(bbox.find('ymin').text)
            ymax = int(bbox.find('ymax').text)

            width = xmax - xmin
            height = ymax - ymin

            if width > (kMaxRatio * disp_width) or height > (kMaxRatio *
                                                             disp_height):
                continue

            if ((xmin < 0) or (ymin < 0) or (xmax <= xmin) or (ymax <= ymin)):
                continue

            objAnnotation = annotation()
            objAnnotation.setbbox(xmin, xmax, ymin, ymax)
            objAnnotation.setWidthHeight(disp_width, disp_height)
            objAnnotation.setImagePath(os.path.join(folder, filename))
            list_of_annotations.append(objAnnotation)
            num_annotations = num_annotations + 1

        return list_of_annotations, num_annotations
Esempio n. 4
0
logger.info('A extraction of information is done.')

################# R1 : get sample information ########
(sample_dic, sample_list, normal_list, tumor_list, paired_list,
 unpaired_list) = get_sample_list(sample_file)
print sample_dic
file_dic = get_file_name(options.basecall, sample_list)
print_sample(sample_list, normal_list, tumor_list, paired_list, unpaired_list)

############## Generation of picard object
picard = picard(params, options.basecall, file_dic)
bwa = bwa(params, file_dic)
gatk = gatk(params, options.basecall, file_dic)
somatic = somatic(params, file_dic)
breakmer = breakmer(options.basecall, file_dic)
anno = annotation(params, file_dic)

################ R2 : Extract Illumina Barcodes and Illumina Basecalls To Sam ####################
job_id = get_start_jobid()

cmd = []
cmd.append(picard.ExtractIlluminaBarcodes())
cmd.append(picard.IlluminaBasecallsToSam())
cmd2 = '\n'.join(cmd)

file_name = '01.BaseCall.%s' % job_id
wfile = open('%s/%s.sh' % (log_dir, file_name), 'w')
wfile.write('%s\n' % pbs_header1(file_name, cpu_no['basecall'], cmd2))
wfile.close()

cmd3 = 'qsub %s/%s.sh' % (log_dir, file_name)
import heatmap
import annotation
import volcano_plot
import scatter_and_violin_plots

# The outputs are already in the output_file. Running this client file will
# give you the same thing as the outputs in that file

# Will output a heatmap
print(heatmap.heatmap())

# Will output a new csv file: results_L-vs-NL_full_annotated
# It will show up within the Final Project folder
# This will take some time as it needs to query
print(annotation.annotation("results_L-vs-NL_full.csv"))

# Will output a volcano plot
print(volcano_plot.volcano_plot())

# Will output a scatter plot
print(scatter_and_violin_plots.scatterplot())

# Will output a violin plot
print(scatter_and_violin_plots.violin_plot())
 def randomize(self):
     rnd = randomizer.randint(self.min_list_size, self.max_list_size)
     for x in range(0, rnd):
         _annotation = annotation()
         _annotation.randomize()
         self.annotation_list.append(_annotation)
Esempio n. 7
0
################# R1 : get sample information ########
(sample_dic, sample_list, normal_list, tumor_list, paired_list, unpaired_list) = get_sample_list(sample_file)
print sample_dic
file_dic = get_file_name(options.basecall, sample_list)
print_sample(sample_list, normal_list, tumor_list, paired_list, unpaired_list)



############## Generation of picard object
picard = picard(params, options.basecall, file_dic)
bwa = bwa(params, file_dic)
gatk = gatk(params, options.basecall, file_dic)
somatic = somatic(params, file_dic)
breakmer = breakmer(options.basecall, file_dic)
anno = annotation(params, file_dic)



################ R2 : Extract Illumina Barcodes and Illumina Basecalls To Sam ####################
job_id = get_start_jobid()

cmd = []
cmd.append(picard.ExtractIlluminaBarcodes())
cmd.append(picard.IlluminaBasecallsToSam())
cmd2 = '\n'.join(cmd)

file_name = '01.BaseCall.%s' % job_id
wfile = open('%s/%s.sh' % (log_dir, file_name),'w')
wfile.write('%s\n' % pbs_header1(file_name, cpu_no['basecall'], cmd2))
wfile.close()
Esempio n. 8
0
def main(args):
    programDirectory = os.path.dirname(os.path.abspath(__file__))
    #read the project file
    projects = {}

    for file in os.listdir(os.path.join(programDirectory,"projects")):
        if file.endswith(".txt") and not file.endswith("example.txt"):
            with open(os.path.join(programDirectory,"projects" ,file)) as ongoing_fd:
                projectID=file.split("/")[-1]
                projectID=file.replace(".txt","")
                #the user has selected a project manually, and it is not this one
                #then there is really nothing to do.

                if not (args.project and (args.project != projectID)):
                    projects[projectID]={};
                    for line in ongoing_fd:
                        try:
                            if line[0] != "#":
                                info=line.strip();
                                info = info.split("\t")
                                projects[projectID][info[0]]=info[1:]
                        except:
                        #the pipeline should not crash if the user adds some newlines etc to the project file
                            pass

    # Read the config file
    (working_dir, available_tools, account, exclude,modules,recursive) = readConfigFile.readConfigFile(programDirectory)
    path_to_bam=""
    default_working_dir=working_dir
    for project in projects:
        #initiate the project parameters based on the project dictionary
        project_path = projects[project]["bam"]
        projectName = project
        #set the output,genmod and frequency db path
        if not projects[project]["output"]:
            working_dir = default_working_dir
        else:
            working_dir= projects[project]["output"][0]
            
        if not projects[project]["genmod"]:
            genmod_file = os.path.join(programDirectory,"genmod")
        else:
            genmod_file=projects[project]["genmod"][0]
        if not projects[project]["db"]:
            frequency_db=os.path.join(working_dir, project,"FindSV","database")
        else:
            frequency_db=projects[project]["db"][0]
        processFilesPath = os.path.join(working_dir, project,"process")
        
        #create a directory to keep track of the analysed files
        if not (os.path.exists(processFilesPath)):
            os.makedirs(processFilesPath)

        #initate the processFiles
        processFiles = initiateProcessFile(available_tools, processFilesPath)

        #search for the projects bam files
        bamfiles=detect_bam_files(project_path, projectName,path_to_bam,recursive)
        #function used to find variants
        processFiles= calling.variantCalling(
            programDirectory, project_path, projectName, working_dir, 
            path_to_bam, available_tools, account, modules,bamfiles, exclude, processFiles,
            processFilesPath)

        #combine the results o the variant calling
        processFiles = combine.combine(programDirectory, processFiles, 
                                             processFilesPath, account,bamfiles)

        #a function used to build databases from vcf files
        processFiles = database.buildDatabase(programDirectory, processFiles,
                                              processFilesPath, account)
        

        # Function that filters the variant files and finds genomic features of 
        # the variants
        processFiles = filter.applyFilter(programDirectory, processFiles, 
                                          processFilesPath, account,frequency_db)

        #function used to annotate the samples
        processFiles = annotation.annotation(programDirectory, processFiles, 
                                             processFilesPath, account,genmod_file)

        #the funciton used for cleaning the vcf file, this is the final step of the pipeline
        processFiles = cleaning.cleaning(programDirectory, processFiles, 
                                             processFilesPath, account)
    return