def load(filePath, loadEmbeddings=True): with open(filePath, 'rb') as w2vFile: firstLine = w2vFile.readline() embeddingsCount, embeddingSize = tuple(firstLine.split(' ')) embeddingsCount, embeddingSize = int(embeddingsCount), int( embeddingSize) wordIndexMap = {} embeddings = np.zeros((embeddingsCount, embeddingSize)) with progress.start( 'Loading W2V embeddings: %(percentage)i%%. %(value)i embeddings %(size)i features each.', embeddingsCount) as update: embeddingIndex = 0 while True: word = '' while True: char = w2vFile.read(1) if not char: if loadEmbeddings: return wordIndexMap, embeddings else: return wordIndexMap if char == ' ': word = word.strip() break word += char wordIndexMap[word] = len(wordIndexMap) if loadEmbeddings: embedding = np.fromfile(w2vFile, dtype='float32', count=embeddingSize) embeddings[wordIndexMap[word]] = embedding else: w2vFile.seek(embeddingSize * 4, io.SEEK_CUR) embeddingIndex += 1 update(embeddingIndex, size=embeddingSize)
def load(filePath, loadEmbeddings=True): with open(filePath, 'rb') as w2vFile: firstLine = w2vFile.readline() embeddingsCount, embeddingSize = tuple(firstLine.split(' ')) embeddingsCount, embeddingSize = int(embeddingsCount), int(embeddingSize) wordIndexMap = {} embeddings = np.zeros((embeddingsCount, embeddingSize)) with progress.start('Loading W2V embeddings: %(percentage)i%%. %(value)i embeddings %(size)i features each.', embeddingsCount) as update: embeddingIndex = 0 while True: word = '' while True: char = w2vFile.read(1) if not char: if loadEmbeddings: return wordIndexMap, embeddings else: return wordIndexMap if char == ' ': word = word.strip() break word += char wordIndexMap[word] = len(wordIndexMap) if loadEmbeddings: embedding = np.fromfile(w2vFile, dtype='float32', count=embeddingSize) embeddings[wordIndexMap[word]] = embedding else: w2vFile.seek(embeddingSize * 4, io.SEEK_CUR) embeddingIndex += 1 update(embeddingIndex, size=embeddingSize)
import dbutils import gitutils import progress db = dbutils.Database() cursor = db.cursor() commits = {} pending_commits = set() cursor.execute("SELECT COUNT(*) FROM commits") print progress.start(cursor.fetchone()[0], prefix="Fetching commits ...") cursor.execute("SELECT id, sha1 FROM commits") for commit_id, commit_sha1 in cursor: commits[commit_id] = commit_sha1 pending_commits.add(commit_id) progress.update() progress.end(" %d commits." % len(commits)) print cursor.execute("SELECT MAX(CHARACTER_LENGTH(name)) FROM repositories")
batch = [] try: for line in open(os.path.join(repository.path, "packed-refs")): if not line.startswith("#"): try: sha1, ref = line.split() if len(sha1) == 40 and ref.startswith("refs/heads/"): refs[ref[11:]] = sha1 except ValueError: pass except IOError, error: if error.errno == errno.ENOENT: pass else: raise progress.start(len(branches), "Repository: %s" % repository.name) heads_path = os.path.join(repository.path, "refs", "heads") for branch_id, branch_name, branch_type, branch_base_id, branch_sha1 in branches: progress.update() try: try: repository_sha1 = open(os.path.join(heads_path, branch_name)).read().strip() except: repository_sha1 = refs.get(branch_name) if repository_sha1 != branch_sha1: progress.write("NOTE[%s]: %s differs (db:%s != repo:%s)" % (repository.name, branch_name, branch_sha1[:8], repository_sha1[:8])) if branch_type == "review": head = getReviewHead(repository, getReview(branch_id))
batch = [] try: for line in open(os.path.join(repository.path, "packed-refs")): if not line.startswith("#"): try: sha1, ref = line.split() if len(sha1) == 40 and ref.startswith("refs/heads/"): refs[ref[11:]] = sha1 except ValueError: pass except IOError as error: if error.errno == errno.ENOENT: pass else: raise progress.start(len(branches), "Repository: %s" % repository.name) heads_path = os.path.join(repository.path, "refs", "heads") branches_in_db = set() for branch_id, branch_name, branch_type, branch_base_id, branch_sha1 in branches: progress.update() branches_in_db.add(branch_name) try: try: repository_sha1 = open(os.path.join(heads_path, branch_name)).read().strip() except: repository_sha1 = refs.get(branch_name) if repository_sha1 != branch_sha1:
import dbutils import gitutils import progress db = dbutils.Database.forSystem() cursor = db.cursor() commits = {} pending_commits = set() cursor.execute("SELECT COUNT(*) FROM commits") print progress.start(cursor.fetchone()[0], prefix="Fetching commits ...") cursor.execute("SELECT id, sha1 FROM commits") for commit_id, commit_sha1 in cursor: commits[commit_id] = commit_sha1 pending_commits.add(commit_id) progress.update() progress.end(" %d commits." % len(commits)) print cursor.execute("SELECT MAX(CHARACTER_LENGTH(name)) FROM repositories")
with open("data/regions/" + region_name + ".dat", "r") as regionfile: categories = [x.strip().split("#")[0] for x in regionfile] #strip out empty lines and extra whitespace categories = [x.strip() for x in categories if x] ncat = len(categories) from itertools import combinations_with_replacement as cwr dicat = cwr(range(ncat), 2) dicat_names = cwr(categories, 2) cat1, cat2 = map(list, zip(*dicat)) c = ROOT.TCanvas() ROOT.gStyle.SetOptStat(0) progress.start(args.name) h2 = ROOT.TH2F(args.name, args.name, ncat, 0, ncat, ncat, 0, ncat) lt_h2 = {} pattern = re.compile(".*(LT.*?)-.*") oldfilename = "" #plot.ActivateBranches(chain, "smearerCat", common_cut) plot.ActivateBranches(chain, "smearerCat", "") #selector = ROOT.TTreeFormula("selector", str(tcut), chain) for i, entry in enumerate(chain): filename = entry.GetFile().GetName() if filename != oldfilename: #selector.UpdateFormulaLeaves() #if selector.EvalInstance() == False: continue oldfilename = filename