Esempio n. 1
0
def column_parser(lines):
    """Parser column format"""

    record = False
    result = []
    struct = []
    seq = ''
    for line in lines:
        if line.startswith('; ------'): #structure part beginns
            record = True
            continue
        if line.startswith('; ******'): #structure part ends
            record = False
            struct =  adjust_base(struct,-1)
            struct = Pairs(struct).directed()#remove duplicates
            struct.sort()

            result.append([seq,struct])
            struct = []
            seq = ''
            continue
        if record:
            sline = line.split()
            if sline[4] == '.': #skip not paired
                seq = ''.join([seq,sline[1]])
                continue
            seq = ''.join([seq,sline[1]])
            pair = (int(sline[3]),int(sline[4])) #(alignpos,align_bp)
            struct.append(pair)
        
    return result
Esempio n. 2
0
def column_parser(lines):
    """Parser column format"""

    record = False
    result = []
    struct = []
    seq = ''
    for line in lines:
        if line.startswith('; ------'):  #structure part beginns
            record = True
            continue
        if line.startswith('; ******'):  #structure part ends
            record = False
            struct = adjust_base(struct, -1)
            struct = Pairs(struct).directed()  #remove duplicates
            struct.sort()

            result.append([seq, struct])
            struct = []
            seq = ''
            continue
        if record:
            sline = line.split()
            if sline[4] == '.':  #skip not paired
                seq = ''.join([seq, sline[1]])
                continue
            seq = ''.join([seq, sline[1]])
            pair = (int(sline[3]), int(sline[4]))  #(alignpos,align_bp)
            struct.append(pair)

    return result
Esempio n. 3
0
def ilm_parser(lines=None, pseudo=True):
    """Ilm format parser

    Takes lines as input and returns a list with Pairs object.
    Pseudo - if True returns pairs with possible pseudoknot
             if False removes pseudoknots       
    """
    pairs = []
    for line in lines:
        if line.startswith('Final') or len(line) == 1:  #skip these lines
            continue
        line = line.strip('\n')
        line = map(int, line.split(None, 2))
        if line[1] == 0:
            continue  #Skip this line, not a pair
        else:
            pairs.append(line)

    pairs = adjust_base(pairs, -1)
    tmp = Pairs(pairs).directed()
    tmp.sort()
    if not pseudo:
        tmp = opt_single_random(tmp)
        tmp.sort()
    result = []
    result.append(tmp)

    return result
Esempio n. 4
0
def ilm_parser(lines=None,pseudo=True):
    """Ilm format parser

    Takes lines as input and returns a list with Pairs object.
    Pseudo - if True returns pairs with possible pseudoknot
             if False removes pseudoknots       
    """
    pairs = []
    for line in lines:
        if line.startswith('Final') or len(line)==1:#skip these lines
            continue
        line = line.strip('\n')
        line = map(int,line.split(None,2))
        if line[1] == 0:
            continue #Skip this line, not a pair
        else:
            pairs.append(line) 

    pairs = adjust_base(pairs,-1)
    tmp = Pairs(pairs).directed()
    tmp.sort()
    if not pseudo:
        tmp = opt_single_random(tmp)
        tmp.sort()
    result = []
    result.append(tmp)

    return result
Esempio n. 5
0
def parse_residues(residue_lines, num_base, unpaired_symbol):
    """Return RnaSequence and Pairs object from residue lines.

    residue_lines -- list of lines or anything that behaves like it. 
        Lines should contain:
        residue_position, residue_identiy, residue_partner.
    num_base -- int, basis of the residue numbering. In bpseq files from
        the CRW website, the numbering starts at 1.
    unpaired_symbol -- string, symbol in the 'partner' column that indicates
        that a base is unpaired. In bpseq files from the CRW website, the
        unpaired_symbol is '0'. This parameter should be a string to allow
        other symbols that can't be casted to an integer to indicate
        unpaired bases.
    
    Checks for double entries both in the sequence and the structure, and
    checks that the structre is valid in the sense that if (up,down) in there,
    that (down,up) is the same.
    """
    #create dictionary/list for sequence and structure
    seq_dict = {}
    pairs = Pairs()

    for line in residue_lines:
        try:
            pos, res, partner = line.strip().split()
            if partner == unpaired_symbol:
                # adjust pos, not partner
                pos = int(pos) - num_base
                partner = None
            else:
                # adjust pos and partner
                pos = int(pos) - num_base
                partner = int(partner) - num_base
            pairs.append((pos, partner))

            #fill seq_dict
            if pos in seq_dict:
                raise BpseqParseError(\
                    "Double entry for residue %s (%s in bpseq file)"\
                    %(str(pos), str(pos+1)))
            else:
                seq_dict[pos] = res

        except ValueError:
            raise BpseqParseError("Failed to parse line: %s" % (line))

    #check for conflicts, remove unpaired bases
    if pairs.hasConflicts():
        raise BpseqParseError("Conflicts in the list of basepairs")
    pairs = pairs.directed()
    pairs.sort()

    # construct sequence from seq_dict
    seq = RnaSequence(construct_sequence(seq_dict))

    return seq, pairs
Esempio n. 6
0
def parse_residues(residue_lines, num_base, unpaired_symbol):
    """Return RnaSequence and Pairs object from residue lines.

    residue_lines -- list of lines or anything that behaves like it. 
        Lines should contain:
        residue_position, residue_identiy, residue_partner.
    num_base -- int, basis of the residue numbering. In bpseq files from
        the CRW website, the numbering starts at 1.
    unpaired_symbol -- string, symbol in the 'partner' column that indicates
        that a base is unpaired. In bpseq files from the CRW website, the
        unpaired_symbol is '0'. This parameter should be a string to allow
        other symbols that can't be casted to an integer to indicate
        unpaired bases.
    
    Checks for double entries both in the sequence and the structure, and
    checks that the structre is valid in the sense that if (up,down) in there,
    that (down,up) is the same.
    """
    #create dictionary/list for sequence and structure
    seq_dict = {}
    pairs = Pairs()
    
    for line in residue_lines:
        try:
            pos, res, partner = line.strip().split()
            if partner == unpaired_symbol:
                # adjust pos, not partner
                pos = int(pos) - num_base
                partner = None
            else:
                # adjust pos and partner
                pos = int(pos) - num_base
                partner = int(partner) - num_base
            pairs.append((pos,partner))
            
            #fill seq_dict
            if pos in seq_dict:
                raise BpseqParseError(\
                    "Double entry for residue %s (%s in bpseq file)"\
                    %(str(pos), str(pos+1)))
            else:
                seq_dict[pos] = res
        
        except ValueError:
            raise BpseqParseError("Failed to parse line: %s"%(line))
    
    #check for conflicts, remove unpaired bases 
    if pairs.hasConflicts():
        raise BpseqParseError("Conflicts in the list of basepairs")
    pairs = pairs.directed()
    pairs.sort()
    
    # construct sequence from seq_dict
    seq = RnaSequence(construct_sequence(seq_dict))
    
    return seq, pairs
Esempio n. 7
0
def ct_parser(lines=None):
    """Ct format parser

    Takes lines from a ct file as input
    
    Returns a list containing sequence,structure and if available the energy.
    [[seq1,[struct1],energy1],[seq2,[struct2],energy2],...]
    """

    count = 0
    length = ''
    energy = None
    seq = ''
    struct = []
    result = []

    for line in lines:
        count+=1
        sline = line.split(None,6) #sline = split line
        if count==1 or new_struct(line):#first line or new struct line.
            if count > 1:
                struct = adjust_base(struct,-1)
                struct = Pairs(struct).directed()
                struct.sort()
                if energy is not None:
                    result.append([seq,struct,energy])
                    energy = None
                else:
                    result.append([seq,pairs])
                struct = []
                seq = ''
            #checks if energy for predicted struct is given
            if sline.__contains__('dG') or sline.__contains__('ENERGY'):
                energy = atof(sline[3])
            if sline.__contains__('Structure'):
                energy = atof(sline[2])
        else:
            seq = ''.join([seq,sline[1]])
            if not int(sline[4]) == 0:#unpaired base
                pair = ( int(sline[0]),int(sline[4]) )
                struct.append(pair) 
    #structs are one(1) based, adjust to zero based
    struct = adjust_base(struct,-1)
    struct = Pairs(struct).directed()
    struct.sort()

    if energy is not None:
        result.append([seq,struct,energy])
    else:
        result.append([seq,struct])
    return result 
Esempio n. 8
0
def ct_parser(lines=None):
    """Ct format parser

    Takes lines from a ct file as input
    
    Returns a list containing sequence,structure and if available the energy.
    [[seq1,[struct1],energy1],[seq2,[struct2],energy2],...]
    """

    count = 0
    length = ''
    energy = None
    seq = ''
    struct = []
    result = []

    for line in lines:
        count+=1
        sline = line.split(None,6) #sline = split line
        if count==1 or new_struct(line):#first line or new struct line.
            if count > 1:
                struct = adjust_base(struct,-1)
                struct = Pairs(struct).directed()
                struct.sort()
                if energy is not None:
                    result.append([seq,struct,energy])
                    energy = None
                else:
                    result.append([seq,pairs])
                struct = []
                seq = ''
            #checks if energy for predicted struct is given
            if sline.__contains__('dG') or sline.__contains__('ENERGY'):
                energy = atof(sline[3])
            if sline.__contains__('Structure'):
                energy = atof(sline[2])
        else:
            seq = ''.join([seq,sline[1]])
            if not int(sline[4]) == 0:#unpaired base
                pair = ( int(sline[0]),int(sline[4]) )
                struct.append(pair) 
    #structs are one(1) based, adjust to zero based
    struct = adjust_base(struct,-1)
    struct = Pairs(struct).directed()
    struct.sort()

    if energy is not None:
        result.append([seq,struct,energy])
    else:
        result.append([seq,struct])
    return result