Example #1
0
def trim_record(read, trim_at):
    new_read = Record()
    new_read.name = read.name
    new_read.sequence = read.sequence[:trim_at]
    if hasattr(read, 'quality'):
        new_read.quality = read.quality[:trim_at]

    return new_read
Example #2
0
def trim_record(read, trim_at):
    new_read = Record()
    new_read.name = read.name
    new_read.sequence = read.sequence[:trim_at]
    if hasattr(read, 'quality'):
        new_read.quality = read.quality[:trim_at]

    return new_read
Example #3
0
def trim_record(read, trim_at):
    "Utility function: create a new record, trimmed at given location."
    new_read = Record()
    new_read.name = read.name
    new_read.sequence = read.sequence[:trim_at]
    if hasattr(read, 'quality'):
        new_read.quality = read.quality[:trim_at]

    return new_read
Example #4
0
def trim_record(read, trim_at):
    "Utility function: create a new record, trimmed at given location."
    new_read = Record()
    new_read.name = read.name
    new_read.sequence = read.sequence[:trim_at]
    if hasattr(read, 'quality'):
        new_read.quality = read.quality[:trim_at]

    return new_read
Example #5
0
def test_read_type_basic():
    # test that basic properties of khmer.Read behave like screed.Record
    # Constructing without mandatory arguments should raise an exception
    with pytest.raises(TypeError):
        Read()

    name = "895:1:1:1246:14654 1:N:0:NNNNN"
    sequence = "ACGT"
    r = Read(name, sequence)
    s = Record(dict(name=name, sequence=sequence))

    for x in (r, s):
        assert x.name == name
        assert x.sequence == sequence
        assert not hasattr(x, 'quality'), x
        assert not hasattr(x, 'annotations'), x
Example #6
0
 def __init__(self, input, output, ispaired, input1, input2, kmerLen,
              lossless, verbose):
     self.mutiDict = {}  ### kmers for buckets
     self.bucketTable = {}  ### buckets index and it's reads number
     self.sequenceTable = {
     }  ####bucket as diction index, within bucket include (seq,reverseFlag,indexPos)
     self.sequenceTableSave = {}
     self.read = {
         'sequence': "",
         'reverse': "",
         'indexPos': "",
         'N': "",
         'len': "",
         'order': ""
     }
     self.kmerLen = kmerLen
     self.bucketIndexLen = kmerLen
     # self.blockSize = 1024
     self.recordNum = 0
     self.onsies = 0  ####the number of bucket which only have one seq
     self.leftOnesies = 0  ## the number onesies after re-assigned
     self.sequenceN = [
     ]  ### the info of N in read  present by (readID,pos,len)
     self.reversedNum = 0  ##total reverse number
     self.skipZone = 0
     self.record = Record()  #read record
     self.buffer = None
     self.sequenceNFlag = False  #indicate if the seq include N
     self.input = input
     self.fileOutputPath = output
     self.seqLen = 0
     self.fileoutName = {
         "index": "",
         "cov": "",
         "rc": "",
         "N": "",
         "indexPos": "",
         "len": 0,
         "order": 0
     }
     self.paired = ispaired
     self.input1 = input1
     self.input2 = input2
     self.verbose = verbose
     self.maxKmer = "T" * kmerLen
     self.lossless = lossless
     self.removeOutputFile()
Example #7
0
    #ref_loc_to_build.append(ref_file_names_to_loc[os.path.splitext(name)[0]])  # since bbmap stuff changed the file extension

# This uses khmer to merge the contigs and put them in one fasta file
for loc in ref_loc_to_build:
    print(os.path.basename(loc))
    fid = khmer.ReadParser(loc)
    seq = ""
    i = 0
    for record in fid:
        if i == 0:
            header = record.name
        seq += "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"
        seq += record.sequence
        i += 1
    print("there")
    record = Record(name=header, sequence=seq)
    write_record(record, out_fid)
    fid.close()
out_fid.close()

# This relies on using bbmap to do the contig merging, and then will use cat to concatenate them
# ls *.bz2 | xargs -P 4 -I{} ~/Documents/bbmap/./fuse.sh in={} out=Fused/{}
# cd Fused
# ls *.bz2 | xargs -P 4 -I{} bzip2 -d {}

for loc in ref_file_names_to_loc.values():  #ref_loc_to_build:
    with open(loc) as infile:
        for line in infile:
            out_fid.write(line)
out_fid.close()