Example #1
0
def alignment_blat_oldMaster(FASTA, reference, fileName, outDir):
    '''
    Align a set of sequence into a reference with blat

    Input:
        1. FASTA: Path to FASTA file with sequences to align
        2. reference: Path to the reference genome in fasta format 
        3. fileName: output file will be named accordingly
        4. outDir: Output directory

    Output:
        1. SAM: Path to SAM file containing input sequences alignments or 'None' if alignment failed 
    '''
    ## Align the sequences into the reference
    PSL = outDir + '/' + fileName + '.psl'
    err = open(outDir + '/align.err', 'w')
    command = 'blat -stepSize=5 -repMatch=2253 -minScore=20 -minIdentity=0 -noHead -out=psl ' + reference + ' ' + FASTA + ' ' + PSL
    status = subprocess.call(command, stderr=err, shell=True)

    if status != 0:
        step = 'ALIGN'
        msg = 'Alignment failed'
        log.step(step, msg)

    return PSL
Example #2
0
def alignment_minimap2(FASTA, index, fileName, processes, outDir):
    '''
    Align a set of sequence into a reference with minimap2

    Input:
        1. FASTA: Path to FASTA file with sequences to align
        2. index: Path to the the index of the reference in .mmi format (generated with minimap2)
        3. fileName: output file will be named accordingly
        4. processes: Number of processes used by minimap2
        5. outDir: Output directory

    Output:
        1. PAF: Path to PAF file containing input sequences alignments or 'None' if alignment failed 
    '''
    ## Align the sequences into the reference
    # Note, condider to use -Y to get soft clippings for supplementary alignments
    PAF = outDir + '/' + fileName + '.paf'
    err = open(outDir + '/align.err', 'w')
    command = 'minimap2 -t ' + str(
        processes) + ' ' + index + ' ' + FASTA + ' > ' + PAF
    status = subprocess.call(command, stderr=err, shell=True)

    if status != 0:
        step = 'ALIGN'
        msg = 'Local alignment failed'
        log.step(step, msg)

    return PAF
Example #3
0
def alignment_bwa(FASTA, reference, fileName, processes, outDir):
    '''
    Align a set of sequence into a reference with bwa mem

    Input:
        1. FASTA: Path to FASTA file with sequences to align
        2. reference: Path to the reference genome in fasta format (bwa mem index must be located in the same folder)
        3. fileName: output file will be named accordingly
        4. processes: Number of processes used by bwa mem
        5. outDir: Output directory

    Output:
        1. SAM: Path to SAM file containing input sequences alignments or 'None' if alignment failed 
    '''
    ## Align the sequences into the reference
    SAM = outDir + '/' + fileName + '.sam'
    err = open(outDir + '/align.err', 'w')
    command = 'bwa mem -Y -t ' + str(
        processes) + ' ' + reference + ' ' + FASTA + ' > ' + SAM
    status = subprocess.call(command, stderr=err, shell=True)

    if status != 0:
        step = 'ALIGN'
        msg = 'Alignment failed'
        log.step(step, msg)

    return SAM
Example #4
0
def komplexityFilter(komplexityThreshold, inFasta, outFasta, outDir):
    '''
    Filter fasta file using komplexity tool
    Input:
        1. komplexityThreshold: Complexity threshold filter.
        2. inFasta: input FASTA file name
        3. outFasta: output FASTA file name
        4. outDir: input AND output directory (it must be the same)
    Output:
        1. allFastas: Filteres FASTA file complete path.
    '''

    # Set input an output files
    allFastas_all = outDir + '/' + inFasta
    allFastas = outDir + '/' + outFasta

    logDir = outDir + '/Logs'
    unix.mkdir(logDir)

    command = 'kz --filter --threshold ' + str(
        komplexityThreshold
    ) + ' --fasta < ' + allFastas_all + ' > ' + allFastas
    err = open(logDir + '/komplexity.err', 'w')
    status = subprocess.call(command, stderr=err, shell=True)
    if status != 0:
        step = 'KOMPLEXITY'
        msg = 'Komplexity filter failed. PID: ' + str(os.getpid())
        log.step(step, msg)

    return allFastas
Example #5
0
def BAM2BED(BAM, outDir):
    '''
    Convert BAM file into BED using bedtools

	Input:
		1. BAM: Path to BAM file 
        2. outDir: Output directory

	Output:
		1. BED: Path to BED file
    '''
    ## 0. Create logs directory
    logDir = outDir + '/Logs'
    unix.mkdir(logDir)

    ## 1. Convert BAM into BED
    BED_path = outDir + '/alignments.bed'
    err = open(logDir + '/BAM2BED.err', 'w')
    command = 'bedtools bamtobed -split -i ' + BAM + ' > ' + BED_path
    status = subprocess.call(command, stderr=err, shell=True)

    if status != 0:
        step = 'BAM2BED'
        msg = 'BAM to BED conversion failed'
        log.step(step, msg)

    ## 2. Add header to BED file
    header = "#ref \t beg \t end \t name \t score \t strand \n"
    with open(BED_path, 'r') as original:
        data = original.read()
    with open(BED_path, 'w') as modified:
        modified.write(header + data)

    return BED_path
Example #6
0
def aligmentMaxNbMatches(FASTA_file, db, PAF_file, outDir):
    '''
    '''

    # DESILENCIAAAAAR!!!

    # TODO: append en el error!
    err = open(outDir + '/identifyMate.err', 'w')
    command = 'minimap2 ' + db + ' ' + FASTA_file + ' > ' + PAF_file
    status = subprocess.call(command, stderr=err, shell=True)

    if status != 0:
        step = 'IDENTIFY MATE SEQ'
        msg = 'Identify mate sequence failed'
        log.step(step, msg)

    # If PAF file is not empty
    if not os.stat(PAF_file).st_size == 0:
        PAFObj = formats.PAF()
        PAFObj.read(PAF_file)

        # Pick the identity of the aligment with highest number of matches
        aligmentMaxNbMatches = PAFObj.sortNbMatches()[0]

    else:
        aligmentMaxNbMatches = None

    return aligmentMaxNbMatches
Example #7
0
def targeted_alignment_minimap2(FASTA, targetInterval, reference, outDir,
                                outFormat):
    # NOTE 2020: In 2020:
    # def targeted_alignment_minimap2(FASTA, targetInterval, reference, outDir):
    '''
    Align a set of sequences into a reference target region. 
    
    Useful for doing local realignment of reads around SV breakpoints. Much faster than whole genome realignment

    Input:
        1. FASTA: Path to FASTA file with sequences to align
        2. targetInterval: Reference genome interval where sequences will be aligned. The interval must be provided as chr:beg-end.
        3. reference: Path to the reference sequences in fasta format. An index of the reference generated with samtools faidx must be located in the same directory
        4. outDir: Output directory
        5. outFormat: BAM or SAM

    Output:
        1. BAM: Path to sorted BAM file containing input sequences alignments or 'None' if realignment failed 
    '''
    ## 0. Create logs directory
    logDir = outDir + '/Logs'
    unix.mkdir(logDir)

    ## 1. Extract the reference target region prior alignment
    target = outDir + '/target.fa'
    err = open(logDir + '/target.err', 'w')
    command = 'samtools faidx ' + reference + ' ' + targetInterval + ' > ' + target
    status = subprocess.call(command, stderr=err, shell=True)

    if status != 0:
        step = 'TARGET'
        msg = 'Extraction of reference target region failed'
        log.step(step, msg)
        return None

    ## 2. Align the sequences into the target region
    # Use -Y to get soft clippings for supplementary alignments
    SAM = outDir + '/alignments.sam'
    err = open(logDir + '/align.err', 'w')
    command = 'minimap2 -Y -a ' + target + ' ' + FASTA + ' > ' + SAM
    status = subprocess.call(command, stderr=err, shell=True)

    if status != 0:
        step = 'ALIGN'
        msg = 'Local alignment failed'
        log.step(step, msg)
        return None

    # NOTE 2020: In 2020 these 2 lines were deleted:
    if outFormat == "SAM":
        return SAM

    ## 3. Convert SAM to sorted BAM
    BAM = bamtools.SAM2BAM(SAM, outDir)

    ## 4. Do cleanup
    unix.rm([target, SAM])

    return BAM
Example #8
0
def index_bwa(fastaPath, outDir):
    '''
    Wrapper to generate BWA index for fasta file
    '''
    err = open(outDir + '/index.err', 'w')
    command = 'bwa index ' + fastaPath
    status = subprocess.call(command, stderr=err, shell=True)

    if status != 0:
        step = 'INDEX'
        msg = 'BWA indexing failed'
        log.step(step, msg)
Example #9
0
    def run(self):
        log.print_action_header("up")

        log.step("Parsing dependencies")

        # Read all dependencies and validate
        all_deps = self.__parse_dependencies()

        # Check to see which differ
        log.step("Diffing dependencies to current")
        deps_to_sync = self.__diff_to_current(all_deps)

        # Sync dependencies
        results = []

        if len(deps_to_sync) > 0:
            log.step(
                "Syncing {count} dependencies".format(count=len(deps_to_sync)))
            for dep in deps_to_sync:
                self.download_and_apply(dep)

        # Output results
        log.step("Collating results")
        print("")
        self.__output_results(results)
Example #10
0
def index_minimap2(fastaPath, fileName, outDir):
    '''
    Wrapper to generate minimap2 index for fasta file
    '''
    indexPath = outDir + '/' + fileName + '.mmi'
    err = open(outDir + '/index.err', 'w')
    command = 'minimap2 -k 11 -w 1 -d ' + indexPath + ' ' + fastaPath
    status = subprocess.call(command, stderr=err, shell=True)

    if status != 0:
        step = 'INDEX'
        msg = 'minimap2 indexing failed'
        log.step(step, msg)

    return indexPath
Example #11
0
def getPAFAlign(FASTA_file, indexDb, outDir):
    # Alineo el fasta consenso
    # TODO ponerlo bien
    PAF_file = FASTA_file.replace(".fa", "_alignments.paf")

    #err = open(logDir + '/align.err', 'w')
    command = 'minimap2 ' + indexDb + ' ' + FASTA_file + ' > ' + PAF_file
    err = open(outDir + '/minimap2.err', 'w')
    status = subprocess.call(command, stderr=err, shell=True)

    if status != 0:
        step = 'ALIGN-INSERT'
        msg = 'minimap2 alignment failed'
        log.step(step, msg)

    return PAF_file
Example #12
0
def rm(paths):
    '''
    Delete set of files/directories. Directories are deleted recursively

    Input:
        1. files: list containing file/directory paths to be deleted
    '''

    for path in paths:

        exist = os.path.exists(path)

        # Only attempt to delete file if it does exists
        if exist:
            try:
                command = ['rm', '-r', path]
                subprocess.call(command)

            except OSError:
                step = 'ERROR'
                msg = "Deletion of the file %s failed" % filePath
                log.step(step, msg)
Example #13
0
def mkdir(path):
    '''
    Create directory

    Note: improve function to be able to create lists of directories

    Input:
        1. path: directory to be created
    '''

    exist = os.path.isdir(path)

    # Only attempt to create directory if it does not exists
    if not exist:
        try:
            command = ['mkdir', '-p', path]
            subprocess.call(command)

        except OSError:
            step = 'ERROR'
            msg = "Creation of the directory %s failed" % path
            log.step(step, msg)
Example #14
0
def alignment_blat(FASTA, reference, args, fileName, outDir):
    '''    
    Align a set of sequence into a reference with blat

    Input:
        1. FASTA: Path to FASTA file with sequences to align
        2. reference: Path to the reference genome in fasta format (bwa mem index must be located in the same folder)
        3. args: dictionary containing blat arguments
        4. fileName: output file will be named accordingly
        5. outDir: Output directory

    Output:
        1. SAM: Path to SAM file containing input sequences alignments or 'None' if alignment failed 
    '''
    ## Align the sequences into the reference
    PSL = outDir + '/' + fileName + '.psl'
    err = open(outDir + '/align.err', 'w')

    # Set blat arguments
    blatArgs = []

    if 'stepSize' in args.keys():
        blatArgs.append('-stepSize=' + str(args['stepSize']))

    if 'tileSize' in args.keys():
        blatArgs.append('-tileSize=' + str(args['tileSize']))

    command = 'blat ' + ' '.join(
        blatArgs
    ) + ' -repMatch=2253 -minScore=20 -minIdentity=0 -noHead -out=psl ' + reference + ' ' + FASTA + ' ' + PSL
    status = subprocess.call(command, stderr=err, shell=True)

    if status != 0:
        step = 'ALIGN'
        msg = 'Alignment failed'
        log.step(step, msg)

    return PSL
Example #15
0
def create_targeted_fasta(targetIntervalList, reference, outDir):
    '''
    Extract regions of interest from a fasta file.
    
    Input:
        1. targetIntervalList: Reference genome list of intervals to be extracted. The intervals must be provided as chr:beg-end.
        2. reference: Path to fasta file. An index of the reference generated with samtools faidx must be located in the same directory
        3. outDir: Output directory

    Output:
        1. target: Path to fasta file with sequences extarcted from intervals.
    '''
    ## 0. Create logs directory
    logDir = outDir + '/Logs'
    unix.mkdir(logDir)

    ## 1. Extract the reference target regions
    target = outDir + '/targetRegions.fa'
    err = open(logDir + '/target.err', 'w')
    targetRegionsPath = outDir + '/targetRegions.txt'
    targetRegions = open(targetRegionsPath, 'w')

    for targetInterval in targetIntervalList:
        targetRegions.write(targetInterval + '\n')
    targetRegions.close()

    command = 'samtools faidx ' + reference + ' -r ' + targetRegionsPath + ' -o ' + target
    status = subprocess.call(command, stderr=err, shell=True)

    if status != 0:
        step = 'TARGET'
        msg = 'Extraction of reference target region failed'
        log.step(step, msg)
        return None

    # TODO: remove targetRegionsPath file

    return target
Example #16
0
def SAM2BAM(SAM, outDir):
    '''
    Convert SAM file into sorted BAM and make BAM index

	Input:
		1. SAM: File containing alignments in SAM format

	Output:
		1. BAM_sorted: Sorted and indexed BAM file. BAM index located in the same directory with the extension '.bai'
    '''
    ## 0. Create logs directory
    logDir = outDir + '/Logs'
    unix.mkdir(logDir)

    ## 1. Convert SAM into BAM
    BAM = outDir + '/alignments.bam'
    err = open(logDir + '/SAM2BAM.err', 'w')
    command = 'samtools view -Sb ' + SAM + ' > ' + BAM
    status = subprocess.call(command, stderr=err, shell=True)

    if status != 0:
        step = 'SAM2BAM'
        msg = 'SAM to BAM conversion failed'
        log.step(step, msg)

    ## 2. Sort bam
    BAM_sorted = outDir + '/alignments.sorted.bam'
    err = open(logDir + '/sort.err', 'w')
    command = 'samtools sort ' + BAM + ' > ' + BAM_sorted
    status = subprocess.call(command, stderr=err, shell=True)

    if status != 0:
        step = 'SORT'
        msg = 'BAM sorting failed'
        log.step(step, msg)

    ## 3. Index bam
    BAM_index = outDir + '/alignments.sorted.bam.bai'
    err = open(logDir + '/index.err', 'w')
    command = 'samtools index ' + BAM_sorted + ' > ' + BAM_index
    status = subprocess.call(command, stderr=err, shell=True)

    if status != 0:
        step = 'INDEX'
        msg = 'BAM indexing failed'
        log.step(step, msg)

    return BAM_sorted
if ssh_key is None:
  ssh_key            = digitalocean.SSHKey(token=token)
  ssh_key.name       = key_name
  ssh_key.public_key = public_key
  ssh_key.create()

droplet_name   = env('AZK_MID', 'azk-deploy')
droplet_name   = env('BOX_NAME', droplet_name)

droplet_region = env('BOX_REGION', 'nyc3')
droplet_image  = env('BOX_IMAGE', 'ubuntu-14-04-x64')
droplet_size   = env('BOX_SIZE', '1gb')
droplet_backup = True if env('BOX_BACKUP', 'false') == 'true' else None
droplet_private_networking = True if env('BOX_PRIVATE_NETWORKING', 'false') == 'true' else None

log.step('Logging into DigitalOcean')
manager = digitalocean.Manager(token=token)
log.step_done()

log.step('Getting existing droplets')
droplets = manager.get_all_droplets()
log.step_done()

droplet = None
for a_droplet in droplets:
  if a_droplet.name == droplet_name:
    droplet = a_droplet
    break

if not droplet is None and (
  droplet.region['slug']     != droplet_region or
Example #18
0
    ssh_key = digitalocean.SSHKey(token=token)
    ssh_key.name = key_name
    ssh_key.public_key = public_key
    ssh_key.create()

droplet_name = env('AZK_MID', 'azk-deploy')
droplet_name = env('BOX_NAME', droplet_name)

droplet_region = env('BOX_REGION', 'nyc3')
droplet_image = env('BOX_IMAGE', 'ubuntu-14-04-x64')
droplet_size = env('BOX_SIZE', '1gb')
droplet_backup = True if env('BOX_BACKUP', 'false') == 'true' else None
droplet_private_networking = True if env('BOX_PRIVATE_NETWORKING',
                                         'false') == 'true' else None

log.step('Logging into DigitalOcean')
manager = digitalocean.Manager(token=token)
log.step_done()

log.step('Getting existing droplets')
droplets = manager.get_all_droplets()
log.step_done()

droplet = None
for a_droplet in droplets:
    if a_droplet.name == droplet_name:
        droplet = a_droplet
        break

if not droplet is None and (
        droplet.region['slug'] != droplet_region