Example #1
0
def load(filePath, loadEmbeddings=True):
    with open(filePath, 'rb') as w2vFile:
        firstLine = w2vFile.readline()
        embeddingsCount, embeddingSize = tuple(firstLine.split(' '))
        embeddingsCount, embeddingSize = int(embeddingsCount), int(
            embeddingSize)
        wordIndexMap = {}
        embeddings = np.zeros((embeddingsCount, embeddingSize))

        with progress.start(
                'Loading W2V embeddings: %(percentage)i%%. %(value)i embeddings %(size)i features each.',
                embeddingsCount) as update:
            embeddingIndex = 0
            while True:
                word = ''
                while True:
                    char = w2vFile.read(1)

                    if not char:
                        if loadEmbeddings:
                            return wordIndexMap, embeddings
                        else:
                            return wordIndexMap

                    if char == ' ':
                        word = word.strip()
                        break

                    word += char

                wordIndexMap[word] = len(wordIndexMap)
                if loadEmbeddings:
                    embedding = np.fromfile(w2vFile,
                                            dtype='float32',
                                            count=embeddingSize)
                    embeddings[wordIndexMap[word]] = embedding
                else:
                    w2vFile.seek(embeddingSize * 4, io.SEEK_CUR)

                embeddingIndex += 1

                update(embeddingIndex, size=embeddingSize)
Example #2
0
def load(filePath, loadEmbeddings=True):
    with open(filePath, 'rb') as w2vFile:
        firstLine = w2vFile.readline()
        embeddingsCount, embeddingSize = tuple(firstLine.split(' '))
        embeddingsCount, embeddingSize = int(embeddingsCount), int(embeddingSize)
        wordIndexMap = {}
        embeddings = np.zeros((embeddingsCount, embeddingSize))

        with progress.start('Loading W2V embeddings: %(percentage)i%%. %(value)i embeddings %(size)i features each.', embeddingsCount) as update:
            embeddingIndex = 0
            while True:
                word = ''
                while True:
                    char = w2vFile.read(1)

                    if not char:
                        if loadEmbeddings:
                            return wordIndexMap, embeddings
                        else:
                            return wordIndexMap

                    if char == ' ':
                        word = word.strip()
                        break

                    word += char

                wordIndexMap[word] = len(wordIndexMap)
                if loadEmbeddings:
                    embedding = np.fromfile(w2vFile, dtype='float32', count=embeddingSize)
                    embeddings[wordIndexMap[word]] = embedding
                else:
                    w2vFile.seek(embeddingSize * 4, io.SEEK_CUR)

                embeddingIndex += 1

                update(embeddingIndex, size=embeddingSize)
Example #3
0
import dbutils
import gitutils
import progress

db = dbutils.Database()
cursor = db.cursor()

commits = {}
pending_commits = set()

cursor.execute("SELECT COUNT(*) FROM commits")

print

progress.start(cursor.fetchone()[0], prefix="Fetching commits ...")

cursor.execute("SELECT id, sha1 FROM commits")

for commit_id, commit_sha1 in cursor:
    commits[commit_id] = commit_sha1
    pending_commits.add(commit_id)

    progress.update()

progress.end(" %d commits." % len(commits))

print

cursor.execute("SELECT MAX(CHARACTER_LENGTH(name)) FROM repositories")
Example #4
0
    batch = []

    try:
        for line in open(os.path.join(repository.path, "packed-refs")):
            if not line.startswith("#"):
                try:
                    sha1, ref = line.split()
                    if len(sha1) == 40 and ref.startswith("refs/heads/"):
                        refs[ref[11:]] = sha1
                except ValueError:
                    pass
    except IOError, error:
        if error.errno == errno.ENOENT: pass
        else: raise

    progress.start(len(branches), "Repository: %s" % repository.name)

    heads_path = os.path.join(repository.path, "refs", "heads")

    for branch_id, branch_name, branch_type, branch_base_id, branch_sha1 in branches:
        progress.update()

        try:
            try: repository_sha1 = open(os.path.join(heads_path, branch_name)).read().strip()
            except: repository_sha1 = refs.get(branch_name)

            if repository_sha1 != branch_sha1:
                progress.write("NOTE[%s]: %s differs (db:%s != repo:%s)" % (repository.name, branch_name, branch_sha1[:8], repository_sha1[:8]))

                if branch_type == "review":
                    head = getReviewHead(repository, getReview(branch_id))
Example #5
0
    batch = []

    try:
        for line in open(os.path.join(repository.path, "packed-refs")):
            if not line.startswith("#"):
                try:
                    sha1, ref = line.split()
                    if len(sha1) == 40 and ref.startswith("refs/heads/"):
                        refs[ref[11:]] = sha1
                except ValueError:
                    pass
    except IOError as error:
        if error.errno == errno.ENOENT: pass
        else: raise

    progress.start(len(branches), "Repository: %s" % repository.name)

    heads_path = os.path.join(repository.path, "refs", "heads")

    branches_in_db = set()

    for branch_id, branch_name, branch_type, branch_base_id, branch_sha1 in branches:
        progress.update()

        branches_in_db.add(branch_name)

        try:
            try: repository_sha1 = open(os.path.join(heads_path, branch_name)).read().strip()
            except: repository_sha1 = refs.get(branch_name)

            if repository_sha1 != branch_sha1:
Example #6
0
import dbutils
import gitutils
import progress

db = dbutils.Database.forSystem()
cursor = db.cursor()

commits = {}
pending_commits = set()

cursor.execute("SELECT COUNT(*) FROM commits")

print

progress.start(cursor.fetchone()[0], prefix="Fetching commits ...")

cursor.execute("SELECT id, sha1 FROM commits")

for commit_id, commit_sha1 in cursor:
    commits[commit_id] = commit_sha1
    pending_commits.add(commit_id)

    progress.update()

progress.end(" %d commits." % len(commits))

print

cursor.execute("SELECT MAX(CHARACTER_LENGTH(name)) FROM repositories")
Example #7
0
with open("data/regions/" + region_name + ".dat", "r") as regionfile:
    categories = [x.strip().split("#")[0] for x in regionfile]

#strip out empty lines and extra whitespace
categories = [x.strip() for x in categories if x]

ncat = len(categories)

from itertools import combinations_with_replacement as cwr
dicat = cwr(range(ncat), 2)
dicat_names = cwr(categories, 2)
cat1, cat2 = map(list, zip(*dicat))

c = ROOT.TCanvas()
ROOT.gStyle.SetOptStat(0)
progress.start(args.name)
h2 = ROOT.TH2F(args.name, args.name, ncat, 0, ncat, ncat, 0, ncat)

lt_h2 = {}

pattern = re.compile(".*(LT.*?)-.*")
oldfilename = ""
#plot.ActivateBranches(chain, "smearerCat", common_cut)
plot.ActivateBranches(chain, "smearerCat", "")
#selector = ROOT.TTreeFormula("selector", str(tcut), chain)
for i, entry in enumerate(chain):
    filename = entry.GetFile().GetName()
    if filename != oldfilename:
        #selector.UpdateFormulaLeaves()
        #if selector.EvalInstance() == False: continue
        oldfilename = filename