Esempio n. 1
0
def createReadLibrary(df, reads_file_1, reads_file_2, args):
    '''loop through a dataframe containing sequences (ref and alt alleles) 
    and expression levels (ref and alt counts) and create two dictionaries of 
    simulated NGS reads (fwd and reverse read pairs)
    '''
    READ_COUNTER = 0
    args.read_target = np.sum(df.ref_read_count) + np.sum(df.alt_read_count)
    
    insert_distribution = [int(i) for i in list(stats.skewnorm.rvs(
            a=args.skew,
            size=args.read_target+10000, # add a buffer in case of rounding errors etc.
            loc=args.insert_mean,
            scale=args.insert_sd,
            ))]
    
    random.shuffle(insert_distribution)
    
    args.insert_distribution = RandomDict([(k,v) for k,v in enumerate(insert_distribution)])
    
    args.insert_distribution_min = min(insert_distribution)
    
    args.qual_scores = RandomDict([(k,v) for k,v in enumerate(readQualscores(args))])
                
    # iterate over each gene/transcript
    for row in df.itertuples():
                        
        # create ref_seq reads
        for i in range(row.ref_read_count):
            READ_COUNTER = createReadPair(
                row.seq, reads_file_1, reads_file_2, READ_COUNTER, args)

        # create alt_seq reads
        for i in range(row.alt_read_count):
            READ_COUNTER = createReadPair(
                row.alt_seq, reads_file_1, reads_file_2, READ_COUNTER, args)
Esempio n. 2
0
 def _parse_initial_node_table(initial_node_table: bfcp_pb2.NodeTable)\
         -> Dict[bytes, bfcp_pb2.NodeTableEntry]:
     result = RandomDict()
     for entry in initial_node_table.entries:
         pub_key = proto_to_pubkey(entry.node.public_key)
         result[pubkey_to_deterministic_string(pub_key)] = entry
     return result
Esempio n. 3
0
def test_many_inserts_deletes():
    r = RandomDict()
    for i in range(10000):
        r[i] = 1
    for i in range(10000):
        del r[i]
    assert len(r) == 0
Esempio n. 4
0
def test_random_value():
    r = RandomDict()
    for i in range(10000):
        r[i] = i

    values = set(range(10000))
    for i in range(100000):
        assert r.random_value() in values
def init_choice_set(image_point):
    choices = RandomDict()
    n = len(image_point)
    for i in range(n):
        for j in range(i + 1, n):
            image_id1, _ = image_point[i]
            image_id2, _ = image_point[j]
            choices[len(choices)] = (image_id1, image_id2)
    return choices
Esempio n. 6
0
    def test_update(self):
        t = RandomDict({'a': 1, 'b': 2})
        self.assertEqual(2, len(t))
        self.assertEqual(1, t['a'])
        self.assertEqual(2, t['b'])

        t.update({'c': 7, 'a': 8})
        self.assertEqual(3, len(t))
        self.assertEqual(8, t['a'])
        self.assertEqual(2, t['b'])
        self.assertEqual(7, t['c'])
Esempio n. 7
0
    def test_del(self):
        t = RandomDict()
        t['a'] = 1
        t['b'] = 2

        del t['a']

        self.assertEqual(2, t['b'])
        self.assertEqual(1, len(t))
        self.assertEqual(2, t.pop('b'))
        self.assertEqual(0, len(t))
Esempio n. 8
0
def test_random_key():
    import string

    r = RandomDict()
    for k in string.ascii_lowercase:
        r[k] = 1
    keyset = set(string.ascii_lowercase)

    while len(r) > 0:
        k = r.random_key()
        assert k in keyset
        del r[k]
Esempio n. 9
0
    def test_set_get(self):
        t = RandomDict()
        t['a'] = 1
        t['b'] = 2
        self.assertEqual(1, t['a'])
        self.assertEqual(2, t['b'])

        t['a'] = 10
        self.assertEqual(10, t['a'])
        self.assertEqual(2, t['b'])
        self.assertEqual(2, len(t))
        self.assertEqual(2, t.get('b'))

        t.setdefault('c', 89)
        self.assertEqual(89, t['c'])
Esempio n. 10
0
    def __init__(self,
                 definitions,
                 use_string_pattern,
                 init_rand_values,
                 max_string_len=200):
        """Creates a Replicator for given type

        :parameter definitions all object definitions that could be referenced in the type that should be replicated
        :parameter use_string_pattern if valid, generated patterns instead of random character strings
        :parameter init_rand_values indicates if the replicated values should be random or - as per default - be 0 or ''
        :parameter max_string_len maximum length of strings, which are generated
        """
        self.definitions = definitions
        self.init_rand_values = init_rand_values
        self.use_string_pattern = use_string_pattern
        self.max_string_len = max_string_len
        self.random = Random()
        self.randomdict = RandomDict()
Esempio n. 11
0
 def initializePatches(self):
     #Instantiate Patches
     #Create a list to hold the patches. We first fill these with
     #zeros to hold the place for each Patch object
     self.patch_dict = {
         i: {
             j: 0
         }
         for i in range(self.rows) for j in range(self.cols)
     }
     for i in range(self.rows):
         for j in range(self.cols):
             #replace zeros with actual Patch objects
             good = "sugar" if i + j >= self.rows else "water"
             self.patch_dict[i][j] = Patch(self, i, j, self.sugarMap[i][j],
                                           good)
     self.empty_patches = RandomDict({(i, j): self.patch_dict[i][j]
                                      for i in range(self.rows)
                                      for j in range(self.cols)})
Esempio n. 12
0
    def initializePatches(self):
        #Instantiate Patches
        #Create a dictionary to hold the patches, organize as grid.
        #We first fill these with zeros as placeh holders
        self.patch_dict = {
            row: {
                col: 0
            }
            for row in range(self.rows) for col in range(self.cols)
        }
        for row in range(self.rows):
            for col in range(self.cols):
                # replace zeros with actual Patch objects
                good = "sugar" if row + col < self.cols else "water"
                self.patch_dict[row][col] = Patch(self, row, col,
                                                  self.sugarMap[row][col],
                                                  good)

    # use RandomDict - O(n) time complexity - for choosing random empty patch
        self.empty_patches = RandomDict({(row, col): self.patch_dict[row][col]
                                         for row in range(self.rows)
                                         for col in range(self.cols)})
Esempio n. 13
0
    def parse_jeopardy(self):
        path = "data/trivia/jeopardy.csv"
        parsed_list = RandomDict()
        print("Special routine for jeopardy parsing")

        encoding = "ISO-8859-1"
        print("encoding detected")

        with open(path, "r", encoding=encoding) as f:
            reader = csv.reader(f)
            counter = 0
            print("csv loop started")
            for row in reader:
                parsed_list[counter] = TriviaLine(question="\n".join(
                    row[:-1]).strip(),
                                                  answers=[row[-1]])
                counter = counter + 1

        if not parsed_list:
            raise ValueError("Empty trivia list")

        print("done")
        return parsed_list
Esempio n. 14
0
    def parse_trivia_list(self, filename):
        if filename == "jeopardy":
            return self.parse_jeopardy()

        print("parsing ordinary trivia txt")
        path = "data/trivia/{}.txt".format(filename)
        parsed_list = RandomDict()

        with open(path, "rb") as f:
            try:
                encoding = chardet.detect(f.read())["encoding"]
            except:
                encoding = "ISO-8859-1"

        with open(path, "r", encoding=encoding) as f:
            trivia_list = f.readlines()

        counter = 0
        for line in trivia_list:
            if "`" not in line:
                continue
            line = line.replace("\n", "")
            line = line.split("`")
            question = line[0]
            answers = []
            for l in line[1:]:
                answers.append(l.strip())
            if len(line) >= 2 and question and answers:
                line = TriviaLine(question=question, answers=answers)
                #parsed_list.append(line)
                parsed_list[counter] = line
                counter = counter + 1

        if not parsed_list:
            raise ValueError("Empty trivia list")

        return parsed_list
Esempio n. 15
0
def genquery(genomeFile, jellyFile, totedits, medindel, insprob, delprob,
             queryfreq, querycount, outputFile):
    #genome - path to genome
    #totedits - total number of edits to make
    #medindel - median (mean) size of indel edits. actual edit length determined from gaussian with mean medindel and std medindel/2
    #insprob - probability of insertion
    #delprob - probability of deletion
    #outputs all edits into a text file called "sampleedits.txt"

    if delprob + insprob > 1.0:
        raise "Error, delprob = {} and insprob = {}. "\
              "The sum is {} > 1.0".format(
                delprob, insprob, delprob + insprob)

    genome = genomeFile.readline()
    genomeFile.close()
    #mf = jellyfish.ReadMerFile(jellyFile)
    qf = jellyfish.QueryMerFile(jellyFile)
    numbases = len(genome) - 1
    genome = genome[0:numbases]
    letters = ['A', 'C', 'G', 'T']
    randr = []
    allinds = []
    snpProb = 1.0 - (insprob + delprob)
    SNPrange = int(snpProb * totedits)
    insrange = int(insprob * totedits)
    delrange = int(delprob * totedits)

    editTypes = (['S'] * SNPrange) +\
                (['D'] * delrange) +\
                (['I'] * insrange)

    random.shuffle(editTypes)
    qcount = 0
    #effectedkmers = dict()
    effectedkmers = RandomDict()
    count = 0
    sample = 10
    for val in editTypes:
        qcount += 1
        if val == 'I':
            p, s, seq = random_insertion(numbases, medindel)
            numbases += s
            outputFile.write('I %d %s\n' % (p, seq))
            if ((qcount - 1) % sample) == 0:
                add_kmers_in_seq(effectedkmers, seq)
                add_kmers_in_seq(effectedkmers, genome[p - K + 1:p + K])

        elif val == 'D':
            p, s = random_deletion(numbases, medindel)
            numbases -= s
            outputFile.write('D %d %d\n' % (p, p + s - 1))
            #add_kmers_in_seq(effectedkmers, genome[p-K+1:p+s-1+K])

        else:
            p, seq = random_snp(numbases)
            outputFile.write('S %d %s\n' % (p, seq))

            if ((qcount - 1) % sample) == 0:
                add_kmers_in_seq(effectedkmers, genome[p - K + 1:p + K - 1])

        # if it's time to output some queries
        if qcount == queryfreq:
            qcount = 0
            for qlist in xrange(querycount):
                dart = random.random()
                if dart <= EDIT_QUERY_PROB:
                    kmer = effectedkmers.random_key()
                    #kmer = random.choice(effectedkmers.keys())
                    #kmer = random.sample(effectedkmers, 1)[0]
                    editflag = 'I'
                else:
                    p = random.randrange(K * 2, numbases - K * 2)
                    kmer = genome[p:p + K].upper()
                    editflag = 'N'

                kcount = 0
                #kcount = int(qf[jellyfish.MerDNA(kmer)])
                outputFile.write('Q %s %s %d\n' % (kmer, editflag, kcount))

    outputFile.close()
grid_col = current_col
grid_id = current_grid

if os.path.exists(os.path.join(output_folder, 'patches%04d.bmp' % grid_id)):
    grid = cv2.imread(os.path.join(output_folder, 'patches%04d.bmp' % grid_id),
                      cv2.IMREAD_GRAYSCALE)
else:
    grid = np.zeros(shape=(grid_sz * input_sz, grid_sz * input_sz),
                    dtype=np.uint8)

info_file = open(os.path.join(output_folder, 'info.txt'), 'a+')
#position_file = open(os.path.join(output_folder, 'position%04d.txt'%grid_id), 'w')

#

matches = RandomDict()
possible_n_matches = 0
nonmatches = {}


def init_choice_set(image_point):
    choices = RandomDict()
    n = len(image_point)
    for i in range(n):
        for j in range(i + 1, n):
            image_id1, _ = image_point[i]
            image_id2, _ = image_point[j]
            choices[len(choices)] = (image_id1, image_id2)
    return choices

Esempio n. 17
0
def test_delete():
    r = RandomDict({'a': 1})
    del r['a']
    assert len(r) == 0
    for cell_barcode in cell_pool:
        whitelist_file.write('{}\n'.format(cell_barcode))

#Generate some mutants
mutants = generate_mutants(cell_pool=cell_pool,
                           base_mutation_rate=0.001,
                           number_of_mutants=10)

print('Writing sim data to file.')
count = 1
with open(args.Outputname + "_R1.fastq",
          "w") as handleR1, open(args.Outputname + "_R2.fastq",
                                 "w") as handleR2:
    for myfile in args.T:
        record_dict = SeqIO.index(myfile.name, "fasta")
        record_rand = RandomDict(record_dict)
        for cell in range(0, nrCells):
            cellSeq = random.sample(cell_pool, 1)[0]
            cell_pool.discard(cellSeq)
            umi_set = generate_umi_set(args.umi_bc_length, 100, 0.2)
            for record in range(0, nrSeqs):
                UMIseq = random.sample(umi_set, 1)[0]
                myItem = record_rand.random_value()
                if random.random() < 0.01:
                    myItem.seq = Seq(
                        mutate(sequence=myItem.seq, mutation_rate=0.001))
                myItem += "A" * 150
                start = 0
                #print(myItem.id + " length: " + str(limit))
                end = start + myLength
                mySub = myItem[start:end]
Esempio n. 19
0
    def __init__(self, **config):

        # initializing time
        self.time = 0

        self.num_taxis = config["num_taxis"]
        self.request_rate = config["request_rate"]

        if "price_fixed" in config:
            # price that is used for every trip
            self.price_fixed = config["price_fixed"]
        else:
            self.price_fixed = 0

        if "price_per_dist" in config:
            # price per unit distance while carrying a passenger
            self.price_per_dist = config["price_per_dist"]
        else:
            self.price_per_dist = 1

        if "cost_per_unit" in config:
            # cost per unit distance (e.g. gas)
            self.cost_per_unit = config["cost_per_unit"]
        else:
            self.cost_per_unit = 0

        if "cost_per_time" in config:
            # cost per time (e.g. amortization)
            self.cost_per_time = config["cost_per_time"]
        else:
            self.cost_per_time = 0

        if "matching" in config:
            self.matching = config["matching"]

        if "batch_size" in config:
            self.batch_size = config["batch_size"]

        if "max_time" in config:
            self.max_time = config["max_time"]

        # this is done now by the config generator in theory
        if "max_request_waiting_time" in config:
            self.max_request_waiting_time = config["max_request_waiting_time"]
        else:
            self.max_request_waiting_time = 10000

        if ("batch_size" in config) and ("max_time" in config):
            self.num_iter = int(np.ceil(self.max_time / self.batch_size))
        else:
            self.num_iter = None

        if "behaviour" in config:  # goback / stay / cruise
            self.behaviour = config["behaviour"]
        else:
            self.behaviour = "go_back"

        if "initial_conditions" in config:  # base / home
            self.initial_conditions = config["initial_conditions"]
        else:
            self.initial_conditions = "base"

        if "reset_time" in config:
            self.reset_time = config["reset_time"]
        else:
            self.reset_time = self.max_time + 1

        # initializing counters
        self.latest_taxi_id = 0
        self.latest_request_id = 0

        # initializing object storage
        self.taxis = RandomDict()
        self.taxis_available = RandomDict()
        self.taxis_to_request = set()
        self.taxis_to_destination = set()

        self.requests = RandomDict()
        self.requests_pending = set()

        # speeding up going through requests in the order of waiting times
        # they are pushed into a deque in the order of timestamps
        self.requests_pending_deque = deque()
        self.requests_pending_deque_batch = deque(
            maxlen=self.max_request_waiting_time)
        self.requests_pending_deque_temporary = deque()
        self.requests_in_progress = set()

        # city layout
        self.city = City(**config)
        # length of pregenerated random number storage
        self.city.length = int(min(self.max_time * self.request_rate, 1e6))

        # whether to log all movements for debugging purposes
        self.log = config["log"]
        self.city.log = self.log
        # showing map of moving taxis in interactive jupyter notebook
        self.show_plot = config["show_plot"]

        # initializing simulation with taxis
        for t in range(self.num_taxis):
            self.add_taxi()

        self.taxi_df = pd.DataFrame.from_dict(
            [dict(v) for k, v in self.taxis.items()])
        self.taxi_df.set_index('taxi_id', inplace=True)

        if self.show_plot:
            # plotting variables
            self.canvas = plt.figure()
            self.canvas_ax = self.canvas.add_subplot(1, 1, 1)
            self.canvas_ax.set_aspect('equal', 'box')
            self.cmap = plt.get_cmap('viridis')
            self.taxi_colors = list(np.linspace(0, 0.85, self.num_taxis))
            shuffle(self.taxi_colors)
            self.show_map_labels = config["show_map_labels"]
            self.show_pending = config["show_pending"]
            self.init_canvas()
Esempio n. 20
0
 def __init__(self, size, time_out):
     self.size = size
     self.time_out = time_out
     self.slot = RandomDict()
     self.clock = {}
     self.ver = '0.1'
Esempio n. 21
0
def test_delete_missing():
    r = RandomDict({'a': 1})
    del r['b']
Esempio n. 22
0
def test_len():
    r = RandomDict({'a': 1})
    assert len(r) == 1
Esempio n. 23
0
def test_empty_random_item():
    r = RandomDict()
    r.random_item()
Esempio n. 24
0
def test_init_with_update():
    r = RandomDict({'a': 1})
    assert 'a' in r
Esempio n. 25
0
from anytree import Node, RenderTree
from anytree.importer import DictImporter
import treeTester
from randomdict import RandomDict
import io
import time

importer = DictImporter()

fNaive = io.open("./runs/TreeTest2.csv", 'a')

fTrees = io.open("./DictStore.pkl", 'r').read().splitlines()

singleRunResults = RandomDict()

key = "DEFAULT, SHOULD NEVER BE SEEN"

for line in fTrees:
    if line[0] == '.':
        key = line
        singleRunResults[key] = []
    else:
        tDict = eval(line)
        root = importer.import_(tDict)
        singleRunResults[key].append(root)

done = 0

for x in singleRunResults.keys:
    for y in range(0, 10):
        t = time.time()
Esempio n. 26
0
def test_empty_random_key():
    r = RandomDict()
    r.random_key()
Esempio n. 27
0
def test_empty_random_value():
    r = RandomDict()
    r.random_value()
Esempio n. 28
0
 def __init__(self):
     """
     init the class
     """
     self.words_dict: RandomDict[str, Word] = RandomDict()