Beispiel #1
0
def main() :
    util = w2lib.Week2Library()
    description = '''\
        Generate the contigs from a collection of reads (with imperfect coverage).

        Input File format :
        ---------------------------------------
        ATG
        ATG
        TGT
        TGG
        CAT
        GGA
        GAT
        AGA
        
        Expected output :
        ---------------------------------------
        AGA ATG ATG CAT GAT TGGA TGT
        '''
    args = util.create_parser(__file__, description)

    dataset_path = "{}/real_dataset.txt".format(DATASET_DIR)

    # Default to the dataset folder, if not provided
    if not args.file : args.file = dataset_path

    
    # Read from dataset file
    with open(args.file, 'r') as f :
        contents = f.readlines()
        kmers = [_.strip() for _ in contents]
    
    result = generateContigs(kmers)
    print(" ".join(result))
Beispiel #2
0
def main() :
    util = w2lib.Week2Library()
    description = '''\
        Given adjacency list, it will reconstruct and output the Eulerian path.

        Input File format :
        ---------------------------------------
        4
        CTTA
        ACCA
        TACC
        GGCT
        GCTT
        TTAC
        
        Expected output :
        ---------------------------------------
        GGCTTACCA
        '''
    args = util.create_parser(__file__, description)

    dataset_path = "{}/real_dataset.txt".format(DATASET_DIR)

    # Default to the dataset folder, if not provided
    if not args.file : args.file = dataset_path

    # Read from dataset file
    with open(args.file, 'r') as f :
        k = f.readline()
        kmers = [_.strip() for _ in f.readlines()]

        genome = stringReconstruction(int(k), kmers)
        print(genome)
Beispiel #3
0
def main():
    util = w2lib.Week2Library()
    description = '''\
        Given a kmer length, it will create all binary combinations with that length.
        (ex. k=3, 000, 001, 010, 011, 100, 101, 110, 111)
        It then takes the kmer, and construct a cycle that covers all of kmers to
        construct the universal circular string.

        Input File format :
        ---------------------------------------
        4
        
        Expected output :
        ---------------------------------------
        0000110010111101
        '''
    args = util.create_parser(__file__, description)

    dataset_path = "{}/real_dataset.txt".format(DATASET_DIR)

    # Default to the dataset folder, if not provided
    if not args.file: args.file = dataset_path

    # Read from dataset file
    with open(args.file, 'r') as f:
        k = f.readline().strip()

    circular_string = universalCircularString(int(k))
    print(circular_string)
def main():
    util = w2lib.Week2Library()
    description = '''\
        Given adjacency list, it will reconstruct and output the Eulerian path.

        Input File format :
        ---------------------------------------
        1 -> 2
        2 -> 3
        3 -> 4,5
        6 -> 7
        7 -> 6
        
        Expected output :
        ---------------------------------------
        1 -> 2 -> 3
        3 -> 4
        3 -> 5
        7 -> 6 -> 7
        '''
    args = util.create_parser(__file__, description)

    dataset_path = "{}/real_dataset.txt".format(DATASET_DIR)

    # Default to the dataset folder, if not provided
    if not args.file: args.file = dataset_path

    # Read from dataset file
    with open(args.file, 'r') as f:
        contents = f.readlines()

    # Format into adjacency list
    adj_dict = {}
    for content in contents:
        row = content.split("->")
        left_node = row[0].strip()
        right_node = row[1].strip().split(",")
        adj_dict[left_node] = right_node

    paths = maximalNonBranchingPaths(adj_dict)
    for path in paths:
        print(" -> ".join(path))
def main() :
    util = w2lib.Week2Library()
    description = '''\
        Given adjacency list, it will reconstruct and output the Eulerian cycle.

        Input File format :
        ---------------------------------------
        0 -> 3
        1 -> 0
        2 -> 1,6
        3 -> 2
        4 -> 2
        5 -> 4
        6 -> 5,8
        7 -> 9
        8 -> 7
        9 -> 6

        Expected output :
        ---------------------------------------
        6->8->7->9->6->5->4->2->1->0->3->2->6
        '''
    args = util.create_parser(__file__, description)

    dataset_path = "{}/real_dataset.txt".format(DATASET_DIR)

    # Default to the dataset folder, if not provided
    if not args.file : args.file = dataset_path

    adjacency_dict = {}

    # Read from dataset file
    with open(args.file, 'r') as f :
        rows = f.readlines()

    for row in rows : 
        row = row.replace(" ", "").strip().split("->")
        adjacency_dict[row[0]] = [r for r in row[1].split(",")]
    
    cycle = eulerCycle(adjacency_dict)
    print("->".join(cycle))
Beispiel #6
0
def main():
    util = w2lib.Week2Library()
    description = '''\
        Given k-length, distance of the paired reads, and the paired reads itself,
        it will try to align the paired reads to reconstruct the full string. 

        Input File format :
        ---------------------------------------
        4 2
        GACC|GCGC
        ACCG|CGCC
        CCGA|GCCG
        CGAG|CCGG
        GAGC|CGGA
        
        Expected output :
        ---------------------------------------
        GACCGAGCGCCGGA
        '''
    args = util.create_parser(__file__, description)

    dataset_path = "{}/real_dataset.txt".format(DATASET_DIR)

    # Default to the dataset folder, if not provided
    if not args.file: args.file = dataset_path

    # Read from dataset file
    with open(args.file, 'r') as f:
        k, d = f.readline().strip().split(" ")
        contents = f.readlines()
        paired_reads = []

        for _ in contents:
            reads = _.split("|")
            read1 = reads[0].strip()
            read2 = reads[1].strip()
            paired_reads.append([read1, read2])

    text = stringSpelledByGappedPatterns(int(k), int(d), paired_reads)
    print(text)
Beispiel #7
0
def main() :
    util = w2lib.Week2Library()
    description = '''\
        Given adjacency list, it will reconstruct and output the Eulerian path.

        Input File format :
        ---------------------------------------
        4 2
        GAGA|TTGA
        TCGT|GATG
        CGTG|ATGT
        TGGT|TGAG
        GTGA|TGTT
        GTGG|GTGA
        TGAG|GTTG
        GGTC|GAGA
        GTCG|AGAT
        
        Expected output :
        ---------------------------------------
        GTGGTCGTGAGATGTTGA
        '''
    args = util.create_parser(__file__, description)

    dataset_path = "{}/real_dataset.txt".format(DATASET_DIR)

    # Default to the dataset folder, if not provided
    if not args.file : args.file = dataset_path

    # Read from dataset file
    with open(args.file, 'r') as f :
        k, d = f.readline().strip().split(" ")
        contents = f.readlines()
        
    paired_reads = [_.strip() for _ in contents]
    
    text = stringReconstructionFromReadPairs(int(k), int(d), paired_reads)
    print(text)