def get_or_create_changeset(repo, hgrepo, ctx): try: cs = Changeset.objects.get(revision=ctx.node()) repo.changesets.add(cs) return cs except Changeset.DoesNotExist: pass # create the changeset, but first, let's see if we need the parents parent_revs = [parent.node() for parent in ctx.parents()] p_dict = dict( Changeset.objects.filter(revision__in=parent_revs).values_list( 'revision', 'id')) for p in ctx.parents(): if p.node() not in p_dict: p_cs = get_or_create_changeset(repo, hgrepo, p) p_dict[p_cs.revision] = p_cs.id cs = Changeset(revision=ctx.node()) cs.user = ctx.user().decode('utf-8', 'replace') cs.description = ctx.description().decode('utf-8', 'replace') branch = ctx.branch() if branch != 'default': # 'default' is already set in the db, only change if needed dbb, __ = Branch.objects.get_or_create(name=branch) cs.branch = dbb # because the many-to-many relationships etc don't work until the object # has an ID cs.save() cs.parents.set(list(p_dict.values())) repo.changesets.add(cs, *(list(p_dict.values()))) spacefiles = [p for p in ctx.files() if p.endswith(' ')] goodfiles = [p for p in ctx.files() if not p.endswith(' ')] if goodfiles: # chunk up the work on files, # mysql doesn't like them all at once chunk_count = len(goodfiles) // 1000 + 1 chunk_size = len(goodfiles) // chunk_count if len(goodfiles) % chunk_size: chunk_size += 1 for i in range(chunk_count): good_chunk = goodfiles[i * chunk_size:(i + 1) * chunk_size] existingfiles = File.objects.filter(path__in=good_chunk) existingpaths = existingfiles.values_list('path', flat=True) existingpaths = dict.fromkeys(existingpaths) missingpaths = [p for p in good_chunk if p not in existingpaths] File.objects.bulk_create([File(path=p) for p in missingpaths]) good_ids = File.objects.filter(path__in=good_chunk) cs.files.add(*list(good_ids.values_list('pk', flat=True))) for path in spacefiles: # hack around mysql ignoring trailing ' ', and some # of our localizers checking in files with trailing ' '. f = [fo for fo in File.objects.filter(path=path) if fo.path == path] if f: cs.files.add(f[0]) else: f = File.objects.create(path=path) cs.files.add(f) f.save() cs.save() return cs
def get_or_create_changeset(repo, hgrepo, revision): try: cs = Changeset.objects.get(revision=revision) repo.changesets.add(cs) return cs except Changeset.DoesNotExist: pass # create the changeset, but first, let's see if we need the parents ctx = hgrepo.changectx(revision) parents = map(lambda _cx: _cx.hex(), ctx.parents()) p_dict = dict(Changeset.objects.filter(revision__in=parents).values_list("revision", "id")) for p in parents: if p not in p_dict: p_cs = get_or_create_changeset(repo, hgrepo, p) p_dict[p_cs.revision] = p_cs.id cs = Changeset(revision=revision) cs.user = ctx.user().decode("utf-8", "replace") cs.description = ctx.description().decode("utf-8", "replace") branch = ctx.branch() if branch != "default": # 'default' is already set in the db, only change if needed dbb, __ = Branch.objects.get_or_create(name=branch) cs.branch = dbb # because the many-to-many relationships etc don't work until the object # has an ID cs.save() cs.parents = p_dict.values() repo.changesets.add(cs, *(p_dict.values())) spacefiles = [p for p in ctx.files() if p.endswith(" ")] goodfiles = [p for p in ctx.files() if not p.endswith(" ")] if goodfiles: # chunk up the work on files, # mysql doesn't like them all at once chunk_count = len(goodfiles) / 1000 + 1 chunk_size = len(goodfiles) / chunk_count if len(goodfiles) % chunk_size: chunk_size += 1 for i in xrange(chunk_count): good_chunk = goodfiles[i * chunk_size : (i + 1) * chunk_size] existingfiles = File.objects.filter(path__in=good_chunk) existingpaths = existingfiles.values_list("path", flat=True) existingpaths = dict.fromkeys(existingpaths) missingpaths = filter(lambda p: p not in existingpaths, good_chunk) cursor = connection.cursor() cursor.executemany( "INSERT INTO %s (path) VALUES (%%s)" % File._meta.db_table, map(lambda p: (p,), missingpaths) ) good_ids = File.objects.filter(path__in=good_chunk) cs.files.add(*list(good_ids.values_list("pk", flat=True))) for path in spacefiles: # hack around mysql ignoring trailing ' ', and some # of our localizers checking in files with trailing ' '. f = filter(lambda fo: fo.path == path, File.objects.filter(path=path)) if f: cs.files.add(f[0]) else: f = File.objects.create(path=path) cs.files.add(f) f.save() cs.save() return cs
def get_or_create_changeset(repo, hgrepo, revision): try: cs = Changeset.objects.get(revision=revision) repo.changesets.add(cs) return cs except Changeset.DoesNotExist: pass # create the changeset, but first, let's see if we need the parents ctx = hgrepo.changectx(revision) parents = map(lambda _cx: _cx.hex(), ctx.parents()) p_dict = dict( Changeset.objects.filter(revision__in=parents).values_list( 'revision', 'id')) for p in parents: if p not in p_dict: p_cs = get_or_create_changeset(repo, hgrepo, p) p_dict[p_cs.revision] = p_cs.id cs = Changeset(revision=revision) cs.user = ctx.user().decode('utf-8', 'replace') cs.description = ctx.description().decode('utf-8', 'replace') branch = ctx.branch() if branch != 'default': # 'default' is already set in the db, only change if needed dbb, __ = Branch.objects.get_or_create(name=branch) cs.branch = dbb # because the many-to-many relationships etc don't work until the object # has an ID cs.save() cs.parents = p_dict.values() repo.changesets.add(cs, *(p_dict.values())) spacefiles = [p for p in ctx.files() if p.endswith(' ')] goodfiles = [p for p in ctx.files() if not p.endswith(' ')] if goodfiles: # chunk up the work on files, # mysql doesn't like them all at once chunk_count = len(goodfiles) / 1000 + 1 chunk_size = len(goodfiles) / chunk_count if len(goodfiles) % chunk_size: chunk_size += 1 for i in xrange(chunk_count): good_chunk = goodfiles[i * chunk_size:(i + 1) * chunk_size] existingfiles = File.objects.filter(path__in=good_chunk) existingpaths = existingfiles.values_list('path', flat=True) existingpaths = dict.fromkeys(existingpaths) missingpaths = filter(lambda p: p not in existingpaths, good_chunk) cursor = connection.cursor() cursor.executemany( 'INSERT INTO %s (path) VALUES (%%s)' % File._meta.db_table, map(lambda p: (p, ), missingpaths)) good_ids = File.objects.filter(path__in=good_chunk) cs.files.add(*list(good_ids.values_list('pk', flat=True))) for path in spacefiles: # hack around mysql ignoring trailing ' ', and some # of our localizers checking in files with trailing ' '. f = filter(lambda fo: fo.path == path, File.objects.filter(path=path)) if f: cs.files.add(f[0]) else: f = File.objects.create(path=path) cs.files.add(f) f.save() cs.save() return cs
def get_or_create_changeset(repo, hgrepo, ctx): try: cs = Changeset.objects.get(revision=ctx.node()) repo.changesets.add(cs) return cs except Changeset.DoesNotExist: pass # create the changeset, but first, let's see if we need the parents parent_revs = [parent.node() for parent in ctx.parents()] p_dict = dict(Changeset.objects .filter(revision__in=parent_revs) .values_list('revision', 'id')) for p in ctx.parents(): if p.node() not in p_dict: p_cs = get_or_create_changeset(repo, hgrepo, p) p_dict[p_cs.revision] = p_cs.id cs = Changeset(revision=ctx.node()) cs.user = ctx.user().decode('utf-8', 'replace') cs.description = ctx.description().decode('utf-8', 'replace') branch = ctx.branch() if branch != 'default': # 'default' is already set in the db, only change if needed dbb, __ = Branch.objects.get_or_create(name=branch) cs.branch = dbb # because the many-to-many relationships etc don't work until the object # has an ID cs.save() cs.parents.set(list(p_dict.values())) repo.changesets.add(cs, *(list(p_dict.values()))) spacefiles = [p for p in ctx.files() if p.endswith(b' ')] goodfiles = [p for p in ctx.files() if not p.endswith(b' ')] if goodfiles: # chunk up the work on files, # mysql doesn't like them all at once chunk_count = len(goodfiles) // 1000 + 1 chunk_size = len(goodfiles) // chunk_count if len(goodfiles) % chunk_size: chunk_size += 1 for i in range(chunk_count): good_chunk = goodfiles[i * chunk_size:(i + 1) * chunk_size] existingfiles = File.objects.filter(path__in=good_chunk) existingpaths = existingfiles.values_list('path', flat=True) existingpaths = dict.fromkeys(existingpaths) missingpaths = [p for p in good_chunk if p not in existingpaths] File.objects.bulk_create([ File(path=p) for p in missingpaths ]) good_ids = File.objects.filter(path__in=good_chunk) cs.files.add(*list(good_ids.values_list('pk', flat=True))) for path in spacefiles: # hack around mysql ignoring trailing ' ', and some # of our localizers checking in files with trailing ' '. f = [fo for fo in File.objects.filter(path=path) if fo.path == path] if f: cs.files.add(f[0]) else: f = File.objects.create(path=path) cs.files.add(f) f.save() cs.save() return cs