def createReadLibrary(df, reads_file_1, reads_file_2, args): '''loop through a dataframe containing sequences (ref and alt alleles) and expression levels (ref and alt counts) and create two dictionaries of simulated NGS reads (fwd and reverse read pairs) ''' READ_COUNTER = 0 args.read_target = np.sum(df.ref_read_count) + np.sum(df.alt_read_count) insert_distribution = [int(i) for i in list(stats.skewnorm.rvs( a=args.skew, size=args.read_target+10000, # add a buffer in case of rounding errors etc. loc=args.insert_mean, scale=args.insert_sd, ))] random.shuffle(insert_distribution) args.insert_distribution = RandomDict([(k,v) for k,v in enumerate(insert_distribution)]) args.insert_distribution_min = min(insert_distribution) args.qual_scores = RandomDict([(k,v) for k,v in enumerate(readQualscores(args))]) # iterate over each gene/transcript for row in df.itertuples(): # create ref_seq reads for i in range(row.ref_read_count): READ_COUNTER = createReadPair( row.seq, reads_file_1, reads_file_2, READ_COUNTER, args) # create alt_seq reads for i in range(row.alt_read_count): READ_COUNTER = createReadPair( row.alt_seq, reads_file_1, reads_file_2, READ_COUNTER, args)
def _parse_initial_node_table(initial_node_table: bfcp_pb2.NodeTable)\ -> Dict[bytes, bfcp_pb2.NodeTableEntry]: result = RandomDict() for entry in initial_node_table.entries: pub_key = proto_to_pubkey(entry.node.public_key) result[pubkey_to_deterministic_string(pub_key)] = entry return result
def test_many_inserts_deletes(): r = RandomDict() for i in range(10000): r[i] = 1 for i in range(10000): del r[i] assert len(r) == 0
def test_random_value(): r = RandomDict() for i in range(10000): r[i] = i values = set(range(10000)) for i in range(100000): assert r.random_value() in values
def init_choice_set(image_point): choices = RandomDict() n = len(image_point) for i in range(n): for j in range(i + 1, n): image_id1, _ = image_point[i] image_id2, _ = image_point[j] choices[len(choices)] = (image_id1, image_id2) return choices
def test_update(self): t = RandomDict({'a': 1, 'b': 2}) self.assertEqual(2, len(t)) self.assertEqual(1, t['a']) self.assertEqual(2, t['b']) t.update({'c': 7, 'a': 8}) self.assertEqual(3, len(t)) self.assertEqual(8, t['a']) self.assertEqual(2, t['b']) self.assertEqual(7, t['c'])
def test_del(self): t = RandomDict() t['a'] = 1 t['b'] = 2 del t['a'] self.assertEqual(2, t['b']) self.assertEqual(1, len(t)) self.assertEqual(2, t.pop('b')) self.assertEqual(0, len(t))
def test_random_key(): import string r = RandomDict() for k in string.ascii_lowercase: r[k] = 1 keyset = set(string.ascii_lowercase) while len(r) > 0: k = r.random_key() assert k in keyset del r[k]
def test_set_get(self): t = RandomDict() t['a'] = 1 t['b'] = 2 self.assertEqual(1, t['a']) self.assertEqual(2, t['b']) t['a'] = 10 self.assertEqual(10, t['a']) self.assertEqual(2, t['b']) self.assertEqual(2, len(t)) self.assertEqual(2, t.get('b')) t.setdefault('c', 89) self.assertEqual(89, t['c'])
def __init__(self, definitions, use_string_pattern, init_rand_values, max_string_len=200): """Creates a Replicator for given type :parameter definitions all object definitions that could be referenced in the type that should be replicated :parameter use_string_pattern if valid, generated patterns instead of random character strings :parameter init_rand_values indicates if the replicated values should be random or - as per default - be 0 or '' :parameter max_string_len maximum length of strings, which are generated """ self.definitions = definitions self.init_rand_values = init_rand_values self.use_string_pattern = use_string_pattern self.max_string_len = max_string_len self.random = Random() self.randomdict = RandomDict()
def initializePatches(self): #Instantiate Patches #Create a list to hold the patches. We first fill these with #zeros to hold the place for each Patch object self.patch_dict = { i: { j: 0 } for i in range(self.rows) for j in range(self.cols) } for i in range(self.rows): for j in range(self.cols): #replace zeros with actual Patch objects good = "sugar" if i + j >= self.rows else "water" self.patch_dict[i][j] = Patch(self, i, j, self.sugarMap[i][j], good) self.empty_patches = RandomDict({(i, j): self.patch_dict[i][j] for i in range(self.rows) for j in range(self.cols)})
def initializePatches(self): #Instantiate Patches #Create a dictionary to hold the patches, organize as grid. #We first fill these with zeros as placeh holders self.patch_dict = { row: { col: 0 } for row in range(self.rows) for col in range(self.cols) } for row in range(self.rows): for col in range(self.cols): # replace zeros with actual Patch objects good = "sugar" if row + col < self.cols else "water" self.patch_dict[row][col] = Patch(self, row, col, self.sugarMap[row][col], good) # use RandomDict - O(n) time complexity - for choosing random empty patch self.empty_patches = RandomDict({(row, col): self.patch_dict[row][col] for row in range(self.rows) for col in range(self.cols)})
def parse_jeopardy(self): path = "data/trivia/jeopardy.csv" parsed_list = RandomDict() print("Special routine for jeopardy parsing") encoding = "ISO-8859-1" print("encoding detected") with open(path, "r", encoding=encoding) as f: reader = csv.reader(f) counter = 0 print("csv loop started") for row in reader: parsed_list[counter] = TriviaLine(question="\n".join( row[:-1]).strip(), answers=[row[-1]]) counter = counter + 1 if not parsed_list: raise ValueError("Empty trivia list") print("done") return parsed_list
def parse_trivia_list(self, filename): if filename == "jeopardy": return self.parse_jeopardy() print("parsing ordinary trivia txt") path = "data/trivia/{}.txt".format(filename) parsed_list = RandomDict() with open(path, "rb") as f: try: encoding = chardet.detect(f.read())["encoding"] except: encoding = "ISO-8859-1" with open(path, "r", encoding=encoding) as f: trivia_list = f.readlines() counter = 0 for line in trivia_list: if "`" not in line: continue line = line.replace("\n", "") line = line.split("`") question = line[0] answers = [] for l in line[1:]: answers.append(l.strip()) if len(line) >= 2 and question and answers: line = TriviaLine(question=question, answers=answers) #parsed_list.append(line) parsed_list[counter] = line counter = counter + 1 if not parsed_list: raise ValueError("Empty trivia list") return parsed_list
def genquery(genomeFile, jellyFile, totedits, medindel, insprob, delprob, queryfreq, querycount, outputFile): #genome - path to genome #totedits - total number of edits to make #medindel - median (mean) size of indel edits. actual edit length determined from gaussian with mean medindel and std medindel/2 #insprob - probability of insertion #delprob - probability of deletion #outputs all edits into a text file called "sampleedits.txt" if delprob + insprob > 1.0: raise "Error, delprob = {} and insprob = {}. "\ "The sum is {} > 1.0".format( delprob, insprob, delprob + insprob) genome = genomeFile.readline() genomeFile.close() #mf = jellyfish.ReadMerFile(jellyFile) qf = jellyfish.QueryMerFile(jellyFile) numbases = len(genome) - 1 genome = genome[0:numbases] letters = ['A', 'C', 'G', 'T'] randr = [] allinds = [] snpProb = 1.0 - (insprob + delprob) SNPrange = int(snpProb * totedits) insrange = int(insprob * totedits) delrange = int(delprob * totedits) editTypes = (['S'] * SNPrange) +\ (['D'] * delrange) +\ (['I'] * insrange) random.shuffle(editTypes) qcount = 0 #effectedkmers = dict() effectedkmers = RandomDict() count = 0 sample = 10 for val in editTypes: qcount += 1 if val == 'I': p, s, seq = random_insertion(numbases, medindel) numbases += s outputFile.write('I %d %s\n' % (p, seq)) if ((qcount - 1) % sample) == 0: add_kmers_in_seq(effectedkmers, seq) add_kmers_in_seq(effectedkmers, genome[p - K + 1:p + K]) elif val == 'D': p, s = random_deletion(numbases, medindel) numbases -= s outputFile.write('D %d %d\n' % (p, p + s - 1)) #add_kmers_in_seq(effectedkmers, genome[p-K+1:p+s-1+K]) else: p, seq = random_snp(numbases) outputFile.write('S %d %s\n' % (p, seq)) if ((qcount - 1) % sample) == 0: add_kmers_in_seq(effectedkmers, genome[p - K + 1:p + K - 1]) # if it's time to output some queries if qcount == queryfreq: qcount = 0 for qlist in xrange(querycount): dart = random.random() if dart <= EDIT_QUERY_PROB: kmer = effectedkmers.random_key() #kmer = random.choice(effectedkmers.keys()) #kmer = random.sample(effectedkmers, 1)[0] editflag = 'I' else: p = random.randrange(K * 2, numbases - K * 2) kmer = genome[p:p + K].upper() editflag = 'N' kcount = 0 #kcount = int(qf[jellyfish.MerDNA(kmer)]) outputFile.write('Q %s %s %d\n' % (kmer, editflag, kcount)) outputFile.close()
grid_col = current_col grid_id = current_grid if os.path.exists(os.path.join(output_folder, 'patches%04d.bmp' % grid_id)): grid = cv2.imread(os.path.join(output_folder, 'patches%04d.bmp' % grid_id), cv2.IMREAD_GRAYSCALE) else: grid = np.zeros(shape=(grid_sz * input_sz, grid_sz * input_sz), dtype=np.uint8) info_file = open(os.path.join(output_folder, 'info.txt'), 'a+') #position_file = open(os.path.join(output_folder, 'position%04d.txt'%grid_id), 'w') # matches = RandomDict() possible_n_matches = 0 nonmatches = {} def init_choice_set(image_point): choices = RandomDict() n = len(image_point) for i in range(n): for j in range(i + 1, n): image_id1, _ = image_point[i] image_id2, _ = image_point[j] choices[len(choices)] = (image_id1, image_id2) return choices
def test_delete(): r = RandomDict({'a': 1}) del r['a'] assert len(r) == 0
for cell_barcode in cell_pool: whitelist_file.write('{}\n'.format(cell_barcode)) #Generate some mutants mutants = generate_mutants(cell_pool=cell_pool, base_mutation_rate=0.001, number_of_mutants=10) print('Writing sim data to file.') count = 1 with open(args.Outputname + "_R1.fastq", "w") as handleR1, open(args.Outputname + "_R2.fastq", "w") as handleR2: for myfile in args.T: record_dict = SeqIO.index(myfile.name, "fasta") record_rand = RandomDict(record_dict) for cell in range(0, nrCells): cellSeq = random.sample(cell_pool, 1)[0] cell_pool.discard(cellSeq) umi_set = generate_umi_set(args.umi_bc_length, 100, 0.2) for record in range(0, nrSeqs): UMIseq = random.sample(umi_set, 1)[0] myItem = record_rand.random_value() if random.random() < 0.01: myItem.seq = Seq( mutate(sequence=myItem.seq, mutation_rate=0.001)) myItem += "A" * 150 start = 0 #print(myItem.id + " length: " + str(limit)) end = start + myLength mySub = myItem[start:end]
def __init__(self, **config): # initializing time self.time = 0 self.num_taxis = config["num_taxis"] self.request_rate = config["request_rate"] if "price_fixed" in config: # price that is used for every trip self.price_fixed = config["price_fixed"] else: self.price_fixed = 0 if "price_per_dist" in config: # price per unit distance while carrying a passenger self.price_per_dist = config["price_per_dist"] else: self.price_per_dist = 1 if "cost_per_unit" in config: # cost per unit distance (e.g. gas) self.cost_per_unit = config["cost_per_unit"] else: self.cost_per_unit = 0 if "cost_per_time" in config: # cost per time (e.g. amortization) self.cost_per_time = config["cost_per_time"] else: self.cost_per_time = 0 if "matching" in config: self.matching = config["matching"] if "batch_size" in config: self.batch_size = config["batch_size"] if "max_time" in config: self.max_time = config["max_time"] # this is done now by the config generator in theory if "max_request_waiting_time" in config: self.max_request_waiting_time = config["max_request_waiting_time"] else: self.max_request_waiting_time = 10000 if ("batch_size" in config) and ("max_time" in config): self.num_iter = int(np.ceil(self.max_time / self.batch_size)) else: self.num_iter = None if "behaviour" in config: # goback / stay / cruise self.behaviour = config["behaviour"] else: self.behaviour = "go_back" if "initial_conditions" in config: # base / home self.initial_conditions = config["initial_conditions"] else: self.initial_conditions = "base" if "reset_time" in config: self.reset_time = config["reset_time"] else: self.reset_time = self.max_time + 1 # initializing counters self.latest_taxi_id = 0 self.latest_request_id = 0 # initializing object storage self.taxis = RandomDict() self.taxis_available = RandomDict() self.taxis_to_request = set() self.taxis_to_destination = set() self.requests = RandomDict() self.requests_pending = set() # speeding up going through requests in the order of waiting times # they are pushed into a deque in the order of timestamps self.requests_pending_deque = deque() self.requests_pending_deque_batch = deque( maxlen=self.max_request_waiting_time) self.requests_pending_deque_temporary = deque() self.requests_in_progress = set() # city layout self.city = City(**config) # length of pregenerated random number storage self.city.length = int(min(self.max_time * self.request_rate, 1e6)) # whether to log all movements for debugging purposes self.log = config["log"] self.city.log = self.log # showing map of moving taxis in interactive jupyter notebook self.show_plot = config["show_plot"] # initializing simulation with taxis for t in range(self.num_taxis): self.add_taxi() self.taxi_df = pd.DataFrame.from_dict( [dict(v) for k, v in self.taxis.items()]) self.taxi_df.set_index('taxi_id', inplace=True) if self.show_plot: # plotting variables self.canvas = plt.figure() self.canvas_ax = self.canvas.add_subplot(1, 1, 1) self.canvas_ax.set_aspect('equal', 'box') self.cmap = plt.get_cmap('viridis') self.taxi_colors = list(np.linspace(0, 0.85, self.num_taxis)) shuffle(self.taxi_colors) self.show_map_labels = config["show_map_labels"] self.show_pending = config["show_pending"] self.init_canvas()
def __init__(self, size, time_out): self.size = size self.time_out = time_out self.slot = RandomDict() self.clock = {} self.ver = '0.1'
def test_delete_missing(): r = RandomDict({'a': 1}) del r['b']
def test_len(): r = RandomDict({'a': 1}) assert len(r) == 1
def test_empty_random_item(): r = RandomDict() r.random_item()
def test_init_with_update(): r = RandomDict({'a': 1}) assert 'a' in r
from anytree import Node, RenderTree from anytree.importer import DictImporter import treeTester from randomdict import RandomDict import io import time importer = DictImporter() fNaive = io.open("./runs/TreeTest2.csv", 'a') fTrees = io.open("./DictStore.pkl", 'r').read().splitlines() singleRunResults = RandomDict() key = "DEFAULT, SHOULD NEVER BE SEEN" for line in fTrees: if line[0] == '.': key = line singleRunResults[key] = [] else: tDict = eval(line) root = importer.import_(tDict) singleRunResults[key].append(root) done = 0 for x in singleRunResults.keys: for y in range(0, 10): t = time.time()
def test_empty_random_key(): r = RandomDict() r.random_key()
def test_empty_random_value(): r = RandomDict() r.random_value()
def __init__(self): """ init the class """ self.words_dict: RandomDict[str, Word] = RandomDict()