def __init__(self, storage_path): self.path = storage self.storage = ZODB.FileStorage.FileStorage(self.path) self.db = ZODB.DB(storage) self.connection = self.db.open() self.data_root = self.connection.root if not root.__dict__['_root'].get('Genome') : root.Genome = BTree() if not root.__dict__['_root'].get('Sequence') : root.Sequence = BTree() if not root.__dict__['_root'].get('Source') : root.Source = BTree()
def __init__(self, conn): conn.add(self) assert self._p_oid == ONE self.users = BTree() # {uid -> user} self.users_by_name = BTree() # {uname -> user} self.uids = BTree() # {cert_der -> uid} self.certs = Certs() # Cert, persistent wrapper for # concatinated cert data # Add nobody placeholder with open(os.path.join(os.path.dirname(__file__), 'nobody.pem')) as f: nobody_pem = f.read() self.certs.add(nobody_pem) self.uids[get_der(nobody_pem)] = None
def modifier(inq, outq): while True: item=inq.get() if item is None: break else: sequence_number=item[0] sequence=item[1].strip().lower() sequence = ''.join([i for i in sequence if i in allowed]) local_features = BTree() for i in range(len(sequence)-k+1): kmer = sequence[i:i+k] feature_index = features[kmer] index = feature_index try: current_count = local_features[index] local_features[index] = current_count + 1 except KeyError: local_features[index] = 1 #normalization sum = 0 for value in local_features.values(): sum = sum + (value*value) sum = math.sqrt(sum) for key in local_features.keys(): value = local_features[key] local_features[key] = float(value)/sum out_string = str(sequence_number) for key,val in local_features.iteritems(): out_string = out_string + ' ' + str(key) + '-' + str(val) out_string = out_string + '\n' outq.put(out_string) del local_features
def positivity(address): ################# read canopy output ################# positivity_scores = BTree() with open(address,'r') as f: for line in f: canopy_members = line.strip().split() total = float(len(canopy_members)) number_of_positive = 0 for member in canopy_members: if membertag_to_class(member) == 1: number_of_positive = number_of_positive + 1 p_score = float(number_of_positive)/total for member in canopy_members: try: score_list = positivity_scores[member] score_list.append(p_score) positivity_scores[member] = score_list except: positivity_scores[member] = [p_score] ############### take average score ################### for key in positivity_scores.keys(): score_list = positivity_scores[key] sum = float(0) for item in score_list: sum = sum + item final_score = sum/float(len(score_list)) positivity_scores[key] = final_score ################ prepare output #################### output = '' for key,value in positivity_scores.items(): output = output + key + ' ' + str(value) + '\n' with open(positivity_outputs, 'w') as f: f.write(output)
def get_students(conn): root = conn.root if 'students' not in conn.root(): print("init zodb database...") students = BTree() # btree可以当成一个字典来用 root.students = students # 将students交给zodb来管理 else: students = root.students return students
def get_students(conn): root = conn.root if 'students' not in conn.root(): print("lala") students = BTree() # btree可以当成一个字典来用 root.students = students # 将students交给zodb来管理 else: print("haha") students = root.students return students
def __init__(self, resources: List = None, indexed=True): self.internal = DictObject() self.internal.resources = deque(resources or []) self.internal.indexed = indexed self.internal.indexes = defaultdict(BTree) if indexed: self.internal.indexes.update({ k: BTree() for k, field in self.ravel.owner.Schema.fields.items() if isinstance(field, self.ravel.indexed_field_types) })
def test_write(): with tempfile.NamedTemporaryFile() as temp: connection = make_storage(temp.name) root = connection.root root.nodes = BTree() esp = ESPNode("esp8266") root.nodes['first_esp'] = esp water_sensor = Node("water_sensor") root.nodes['water'] = water_sensor e = Edge(esp, water_sensor) print(esp.from_edges) # root.nodes['wtf'] = e # esp.from_edges.append(water_sensor) transaction.commit()
def test(regressor, scaler): test_samples = [] with open(os.path.join(os.getcwd(), "test_samples.txt"), "r") as f: line = f.readline() test_samples = line.strip().split() for sample in test_samples: regressed_values = BTree() test_row = [] test_col = [] test_data = [] path = os.path.join(test_data_path, sample) row_counter = 0 with open(path, 'r') as f: for line in f: line = line.strip() if line != ' ' and line != '' and line != '\n': items = line.split() sequence_number = int(items[0].strip()) regressed_values[row_counter] = sequence_number for i in items[1:]: col_id, data_value = i.strip().split("-") col_id = int(col_id) data_value = float(data_value) test_row.append(row_counter) test_col.append(col_id) test_data.append(data_value) row_counter = row_counter + 1 test_row = np.asarray(test_row) test_col = np.asarray(test_col) test_data = np.asarray(test_data) x_test = coo_matrix((test_data, (test_row, test_col)), shape=(row_counter, total_cols)) x_test = scaler.transform(x_test) y_test = regressor.predict(x_test) y_test[y_test < 0] = 0.0 y_test[y_test > 1] = 1.0 toBeSorted = [] for r in range(0, len(y_test)): toBeSorted.append((regressed_values[r], y_test[r])) toBeSorted = sorted(toBeSorted, key=lambda x: x[1], reverse=True) gc.collect() out = os.path.join(regression_output_Path, sample) with open(out, 'w') as f: for value in toBeSorted: f.write(str(value[0]) + ' ' + str(value[1]) + '\n')
def train_set_positivity(): data = BTree() with open("positivity_outputs.txt", "r") as f: for line in f: line = line.strip().split() sample = line[0].split("_")[0] seq_number = line[0].split("_")[1] score = line[1] try: curr = data[sample] curr.append((seq_number,score)) data[sample] = curr except: data[sample] = [(seq_number,score)] for key, val in data.items(): with open(os.path.join(train_set_positivity_scores, key), "w") as f: for item in val: f.write(item[0] + " " + item[1] + "\n")
def test_jsonifier(self): from zope.testing.loggingsupport import InstalledHandler handler = InstalledHandler('newt.db.jsonpickle') from ..jsonpickle import Jsonifier jsonifier = Jsonifier() p, tid = self.conn._storage.load(z64) class_name, ghost_pickle, state = jsonifier('0', p) self.assertEqual('persistent.mapping.PersistentMapping', class_name) self.assertEqual('{"data": {}}', state) self.assertTrue( p.startswith(ghost_pickle) and ghost_pickle[-1:] == b'.' and b'persistent.mapping' in ghost_pickle) # custon skip_class jsonifier2 = Jsonifier(skip_class=lambda c: 1) self.assertEqual((None, None, None), jsonifier2('0', p)) # empty records are skipped: self.assertEqual((None, None, None), jsonifier('0', '')) # BTrees are skipped by default from BTrees.OOBTree import BTree self.root.x = BTree() self.conn.transaction_manager.commit() p, tid = self.conn._storage.load(self.root.x._p_oid) self.assertEqual((None, None, None), jsonifier('0', p)) # errors are logged, and return Nones: self.assertEqual(handler.records, []) self.assertEqual((None, None, None), jsonifier('foo', b'badness')) self.assertEqual( [r.getMessage().replace("b'", "'") for r in handler.records], ["Failed pickle load, oid: 'foo', pickle starts: 'badness'"]) handler.uninstall()
def __init__(self): self.contracts = BTree() self.instance_lists = BTree() self.last_block = 0
import math #################################################################### import multiprocessing from multiprocessing import Pool from multiprocessing import Process from multiprocessing import Queue #################################################################### d = 1000 #################################################################### hyper_planes = BTree() ft_count = 0 kmer_feature_file = os.path.join(os.getcwd(), "kmer_feat_index.txt") with open(kmer_feature_file, 'r') as f: for line in f: line = line.strip() if line != '' and line != ' ' and line != '\n': line = line.split()[1] ft_count = ft_count + 1 try: index = int(line) hyper_planes[index] = np.random.randn(d) except ValueError: print('\nkmer feature index not valid\n') sys.exit(1) if index != ft_count - 1:
def on_bind(self, resource_type: Type['Resource'], **kwargs): for k, field in resource_type.ravel.schema.fields.items(): if field.scalar and (type(field) is not Field): self.indexes[k] = BTree()
def main(): hash_file = os.path.join("combined_train_lsh.txt") canopy_output_file = os.path.join("canopy_output.txt") hashed_sequence = BTree() current_dataset = [] dataCounter = 0 with open(hash_file, 'r') as f: for line in f: line = line.strip().split() try: seq_id = line[0] hash_value = (int)(line[1]) current_dataset.append(seq_id) dataCounter = dataCounter + 1 hashed_sequence[seq_id] = hash_value except ValueError: print("\nhash value invalid\n") sys.exit(1) print("total # of instances in training set: " + str(dataCounter)) gc.collect() time.sleep(2) canopy_counter = 0 with open(canopy_output_file, 'w') as f: while dataCounter > 0: canopyIndex = random.choice(current_dataset) canopyCenterHash = hashed_sequence[canopyIndex] current_dataset.remove(canopyIndex) dataCounter = dataCounter - 1 remove_list = [] #################################################################### num_workers = (int)(multiprocessing.cpu_count() / 2) workers = [] inq = multiprocessing.Queue() outq = multiprocessing.Queue() deleteq = multiprocessing.Queue() st_curr_canopy_q = multiprocessing.Queue() current_dataset_q = multiprocessing.Queue() #################################################################### for i in range(num_workers): tmp = multiprocessing.Process(target=modifier, args=( canopyCenterHash, inq, outq, deleteq, )) tmp.daemon = True tmp.start() workers.append(tmp) #################################################################### fileWriteProcess = multiprocessing.Process(target=writer, args=( canopyIndex, outq, st_curr_canopy_q, )) fileWriteProcess.daemon = True fileWriteProcess.start() #################################################################### for seq in current_dataset: hv = hashed_sequence[seq] inq.put((seq, hv)) #################################################################### datasetUpdater = multiprocessing.Process(target=data_updater, args=( current_dataset, deleteq, current_dataset_q, )) datasetUpdater.daemon = True datasetUpdater.start() #################################################################### for i in range(num_workers): inq.put(None) for worker in workers: worker.join() #################################################################### outq.put(None) deleteq.put(None) while True: try: curr_canopy = st_curr_canopy_q.get() break except: pass f.write(curr_canopy) fileWriteProcess.join() while True: try: c_s = current_dataset_q.get() current_dataset = c_s break except: pass datasetUpdater.join() #################################################################### # with open("temp_file.txt", "r") as t: # s = t.readline() # f.write(s) #################################################################### inq.close() outq.close() #final_output_string.close() deleteq.close() del inq del outq #del final_output_string del deleteq del workers gc.collect() #################################################################### dataCounter = len(current_dataset) canopy_counter = canopy_counter + 1 print( str(canopy_counter) + ". Remaining # of instances: " + str(dataCounter))
def __init__(self, invertedIndex): self._invertedIndex = invertedIndex self._btree = BTree() self.build()
all_samples.extend(filenames) break train_source_data = os.path.join(os.getcwd(), "train_set_positivity_scores") train_samples = [] for (dirpath, dirnames, filenames) in walk(train_source_data): train_samples.extend(filenames) break test_source_data = os.path.join(os.getcwd(), "regression_output") test_samples = [] for (dirpath, dirnames, filenames) in walk(test_source_data): test_samples.extend(filenames) break sampleClassLabels = BTree() #################################################################### def classLabelCreator(): global all_samples global sampleClassLabels global metadata for s in all_samples: sampleClassLabels[s] = 0 with open(metadata, 'r') as mf: for line in mf: line = line.strip().split() if ("CD" in line[0]) or ("UC" in line[0]): for key in sampleClassLabels.keys():
def __init__(self): super().__init__() self._btree = BTree() self.build()
def _get_token_map(self): acl_users = self.getSite().acl_users.aq_base if not hasattr(acl_users, '_recover_password_tokens'): acl_users._recover_password_tokens = BTree() return acl_users._recover_password_tokens
import multiprocessing from multiprocessing import Pool from multiprocessing import Process from multiprocessing import Queue #################################################################### k = 6 #################################################################### kmer_index_file = os.path.join(os.getcwd(),'kmer_feat_index.txt') f = open(kmer_index_file, 'w') allowed = ['a','c','g','t'] features = BTree() feature_count = -1 for ft_string in map(''.join, itertools.product('acgt', repeat=k)): feature_count = feature_count + 1 features[ft_string]=feature_count f.write(ft_string + ' ' + str(feature_count) + '\n') f.close() #################################################################### def reader(input_file, inq): sequence_number=0 f = open(input_file, 'r') sequence='' for line in f: line=line.strip()
# In the code below, we track the number of requests made and the reset time. # When we hit the limit, we pause until the reset time and then continue. github = github3.login(github_login, github_password) calls_left = api_calls_left(github) msg('Started at ', datetime.now()) started = timer() msg('Opening database "{}"'.format(dbfile)) dbconnection = ZODB.connection(dbfile) dbroot = dbconnection.root() if not 'github' in dbroot.keys(): msg('Empty database -- creating root object') dbroot['github'] = BTree() else: print('"{}" contains {} entries'.format(dbfile, len(dbroot['github']))) db = dbroot['github'] msg('Initial GitHub API calls remaining: ', calls_left) msg('Generating list of all repositories:') # If we're restarting this process, we will already have entries in the db. count = len(dbroot['github']) # The iterator returned by github.all_repositories() is continuous; behind # the scenes, it uses the GitHub API to get new data when needed. Each API # call nets 100 repository records, so after we go through 100 objects in the
import time import math #################################################################### import multiprocessing from multiprocessing import Pool from multiprocessing import Process from multiprocessing import Queue #################################################################### positivity_score_output = os.path.join(os.getcwd(), "positivity_outputs.txt") nn_train_file = os.path.join(os.getcwd(), "nn_train_file.txt") kmer_file = os.path.join(os.getcwd(), "metagenomic_data_kmer") scores = BTree() def read_scores(): with open(positivity_score_output, "r") as f: for line in f: line = line.strip().split() sample_sequence = line[0] sample = sample_sequence.split("_")[0] sequence = (int)(sample_sequence.split("_")[1]) score = (float)(line[1]) try: current = scores[sample] current.append((sequence, score)) scores[sample] = current except:
import sys import itertools import time import math #################################################################### import multiprocessing from multiprocessing import Pool from multiprocessing import Process from multiprocessing import Queue from Cython.Shadow import address #################################################################### sampleClassLabels = BTree() train_instances = BTree() metadata = os.path.join(os.getcwd(),"metadata.txt") kmer_data_path = os.path.join(os.getcwd(), "metagenomic_data_kmer") canopy_output = os.path.join(os.getcwd(), "canopy_output.txt") positivity_outputs = os.path.join(os.getcwd(), "positivity_outputs.txt") train_set_positivity_scores = os.path.join(os.getcwd(), "train_set_positivity_scores") samples = [] for (dirpath, dirnames, filenames) in walk(kmer_data_path): samples.extend(filenames) break #################################################################### def classLabelCreator(): global samples
def _prepareDataBase(objName, dbPosition): # type: (str, Persistent)-> None if objName not in dbPosition: dbPosition[objName] = BTree()
import transaction class Student: def __init__(self, id, name, class_name, score): self.id = id self.name = name self.class_name = class_name self.score = score csv_filename = 'data.csv' db_filename = 'data/data.fs' conn = ZODB.connection(db_filename) students = BTree() root = conn.root root.students = students with open(csv_filename, mode="r", encoding="GBK") as file: students.clear() reader = csv.reader(file) for row in reader: id = row[0] name = row[1] class_name = row[2] score = float(row[3]) if id in students.keys(): print(f"载入失败:学号{id}已存在!") student = Student(id, name, class_name, score)
def __init__(self): self.T = BTree()