Beispiel #1
0
    def fetch_gref(channel, identifier):
        crypto_adaptor = station.get_crypto_adaptor()
        adaptor = GithubReadAdaptor(station, channel)
        gref = Gref(station.store, channel, identifier)
        log.info("Trying to fetch channel: %s identifier: %s" %
                 (channel, identifier))
        marshalled_thread = adaptor.get_issue(gref,
                                              crypto_adaptor=crypto_adaptor)
        root_obj = marshalled_thread["roots"].pop()
        root = root_obj.as_json()
        root["hash"] = oid2hex(pygit2.hash(root_obj.as_object()))

        response = []

        while marshalled_thread["thread"]:
            node = marshalled_thread["thread"].pop()
            data = json.loads(node.data)
            data["parents"] = list(node.parents)
            data["hash"] = oid2hex(pygit2.hash(node.as_object()))
            response.append(data)
        return jsonate(
            {
                "content": response,
                "root": root,
                "tips": marshalled_thread["tips"],
                "signatures": marshalled_thread["signatures"]
            }, False)
Beispiel #2
0
 def hash_feature(self, feature, without_pk=False):
     """
     Given a feature, git-hash it using this schema.
     If without_pk is True, the resulting hash doesn't depend on the feature's pk values.
     """
     data = self.encode_feature(feature, without_pk=without_pk)
     return pygit2.hash(data).hex
def spooled_update_matuc_config(course_pk):
    """Updates matuc configuration file in repository if its content has changed."""
    with transaction.atomic():
        course = Course.objects.select_for_update(of=("self", )).get(
            pk=course_pk)
        content = course.generate_matuc_config()
        config_file = posixpath.normpath(
            posixpath.join(settings.MS_GIT_EDIT_SUBDIR,
                           settings.MS_MATUC_CONFIG_FILE))
        repo = pygit2.Repository(course.absolute_repository_path)
        browser = git_utils.ContentBrowser(repo, settings.MS_GIT_MAIN_REF)
        try:
            existing_id = browser[config_file].id
        except KeyError:
            # File wasn't present
            pass
        else:
            if existing_id == pygit2.hash(content):
                # File is unchanged
                return
        browser.add_from_bytes(config_file, content)
        commit_id = browser.commit(
            git_utils.create_admin_signature(),
            "Updated metadata",
            settings.MS_GIT_MAIN_REF,
        )
        # Update revision to trigger builds
        course.mark_material_updated(commit_id.hex)
        course.save()
Beispiel #4
0
    def handle_describechannels_for_missing_tip(self):
        test_id = u"tests/1"
        test_channel = u"test_channel"
        test_protocol = u"test_protocol"

        gref = Gref(self.station.station.store, test_channel, test_id)

        root_obj = RootObject(test_id, test_channel, test_protocol)
        root_oid = self.station.station.write(root_obj.as_object())
        current_oid = [root_oid]

        for i in xrange(10):
            update_obj = UpdateObject([current_oid.pop()], "loldata")
            current_oid.append(self.station.station.write(update_obj.as_object()))

        update_obj = UpdateObject([current_oid.pop()], "loldata")
        current_oid.append(binascii.hexlify(pygit2.hash(update_obj.as_object())))

        self.station.station.update_gref(gref, [Tip(root_oid, "")])
        self.assertEqual([root_oid], gref.tips())
        current_oid = current_oid.pop()

        self.station.payload = _payload(update_obj, gref, [current_oid])
        handle_describechannels(self.station)
        self.assertEqual([current_oid], gref.tips())
    def handle_describechannels_for_missing_tip(self):
        test_id = u"tests/1"
        test_channel = u"test_channel"
        test_protocol = u"test_protocol"

        gref = Gref(self.station.station.store, test_channel, test_id)

        root_obj = RootObject(test_id, test_channel, test_protocol)
        root_oid = self.station.station.write(root_obj.as_object())
        current_oid = [root_oid]

        for i in xrange(10):
            update_obj = UpdateObject([current_oid.pop()], "loldata")
            current_oid.append(self.station.station.write(update_obj.as_object()))

        update_obj = UpdateObject([current_oid.pop()], "loldata")
        current_oid.append(binascii.hexlify(pygit2.hash(update_obj.as_object())))

        self.station.station.update_gref(gref, [root_oid])
        self.assertEqual([root_oid], gref.tips())
        current_oid = current_oid.pop()

        self.station.payload = _payload(update_obj, gref, [current_oid])
        handle_describechannels(self.station)
        self.assertEqual([current_oid], gref.tips())
Beispiel #6
0
    def add(self, path, contents, mode=None):
        self._assert_revision()
        path = clean_path(path).encode('UTF-8')
        mode = int('0100{0}'.format(str(mode or '644')), 0)

        if isinstance(contents, unicode):
            contents = contents.encode('UTF-8')

        self._stash[path] = (pygit2.hash(contents), mode)
        self._contents.add(contents)
Beispiel #7
0
    def fetch_gref(channel, identifier):
        adaptor = github_protocol.GithubReadAdaptor(station, channel)
        gref = Gref(station.store, channel, identifier)
        log.info("Trying to fetch channel: %s identifier: %s" %
                (channel, identifier))
        marshalled_thread = adaptor.get_issue(gref)
        root_obj = marshalled_thread["roots"].pop()
        root = root_obj.as_json()
        root["hash"] = oid2hex(pygit2.hash(root_obj.as_object()))

        response = []

        while marshalled_thread["thread"]:
            node = marshalled_thread["thread"].pop()
            data = json.loads(node.data)
            data["parents"] = list(node.parents)
            data["hash"] = oid2hex(pygit2.hash(node.as_object()))
            response.append(data)
        return jsonate({"content": response, "root": root, "tips": marshalled_thread["tips"]}, False)
Beispiel #8
0
def handle_transfer(self):
    git_pb = GitObject()
    git_pb.ParseFromString(self.payload)
    log.info("Handling TRANSFER of %s" % (git_pb.type))

    try:
        req = self.station.get_request(self.id)
    except KeyError:
        raise UnsolicitedTransfer

    if git_pb.type == pygit2.GIT_OBJ_BLOB:
        data_hash = utils.oid2hex(pygit2.hash(git_pb.data))
        assert req.payload == data_hash, \
            "Attempted to be sent invalid object for %s; got %s" % (req.payload, data_hash)
    ret = self.station.store.write(git_pb.type, git_pb.data)
    log.info("Wrote object %s into local db" % logger.fix_oid(ret))
Beispiel #9
0
def handle_transfer(self):
    git_pb = GitObject()
    git_pb.ParseFromString(self.payload)
    log.info("Handling TRANSFER of %s" % (git_pb.type))

    try:
        req = self.station.get_request(self.id)
    except KeyError:
        raise UnsolicitedTransfer

    if git_pb.type == pygit2.GIT_OBJ_BLOB:
        data_hash = utils.oid2hex(pygit2.hash(git_pb.data))
        assert req.payload == data_hash, \
            "Attempted to be sent invalid object for %s; got %s" % (req.payload, data_hash)
    ret = self.station.store.write(git_pb.type, git_pb.data)
    log.info("Wrote object %s into local db" % logger.fix_oid(ret))
    def test_transfer_object(self):
        self.station.set_real_id(True)

        object_body = "foo bar baz butts lol"
        oid = pygit2.hash(object_body)

        req = MockRequest(self.station.id)
        req.payload = oid2hex(oid)

        self.station.station.registry.register(req)

        git_pb = GitObject()
        git_pb.data = object_body
        git_pb.type = pygit2.GIT_OBJ_BLOB
        self.station.payload = git_pb.SerializeToString()
        handle_transfer(self.station)

        self.assertTrue(self.station.station[oid2hex(oid)], object_body)
Beispiel #11
0
    def test_transfer_object(self):
        self.station.set_real_id(True)

        object_body = "foo bar baz butts lol"
        oid = pygit2.hash(object_body)

        req = MockRequest(self.station.id)
        req.payload = oid2hex(oid)

        self.station.station.registry.register(req)

        git_pb = GitObject()
        git_pb.data = object_body
        git_pb.type = pygit2.GIT_OBJ_BLOB
        self.station.payload = git_pb.SerializeToString()
        handle_transfer(self.station)

        self.assertTrue(self.station.station[oid2hex(oid)], object_body)
Beispiel #12
0
    def create_edge(self, edge, **obj):
        """creates a staged edge entry including its indexed fields.

        :param edge: a string or a :py:class:`Edge` subclass reference
        :param ``**kw``: the field values
        :returns: an instance of the given edge

        """
        predicate_ids = []

        edge = resolve_edge_name(edge)
        edge_uuid = obj.pop('uuid', generate_uuid())
        obj['uuid'] = edge_uuid

        edge_data = self.serialize(obj)
        object_hash = bytes(pygit2.hash(edge_data))
        object_path = os.path.join(edge, 'objects')

        id_path = os.path.join(edge, '_ids')
        uuid_path = os.path.join(edge, '_uuids')

        indexes = {}
        for key in obj.keys():
            value = obj.get(key, None)
            if edge_has_index(edge, key):
                indexes[key] = value

            predicate_path = os.path.join(edge, 'indexes', key)
            predicate_ids.append(
                self.add_spo(predicate_path, object_hash, value))

        self.add_spo(object_path, object_hash, edge_data)
        self.add_spo(id_path, edge_uuid, object_hash)
        self.add_spo(uuid_path, object_hash, edge_uuid)

        return Edge.from_data(edge, **obj)
Beispiel #13
0
def locked_git_index(extension_name):
    """
    Returns an empty index file, but extended with a required extension in the extensions section of the index binary
    format. (Not the file extension - the filename is simply "index", it has no file extension.)
    Causes all git commands that would involve the index or working copy to fail with "unsupported extension: NAME" -
    where name is "kart" or ".sno", giving the user a clue as to which command they *should* be using.
    in that sense it is "locked" to git. Various techniques can be used to unlock it if certain git functionality is
    needed - eg marking the repository as bare so it is ignored, or removing the unsupported extension.
    """
    assert isinstance(extension_name, bytes)
    assert len(extension_name) == 4
    first_char = extension_name[0]
    assert not (first_char >= ord("A") and first_char <= ord("Z"))

    GIT_INDEX_VERSION = 2
    BASE_EMPTY_GIT_INDEX = struct.pack(">4sII", b"DIRC", GIT_INDEX_VERSION, 0)

    # Extension name must not start with A-Z, therefore is a required extension.
    # See https://git-scm.com/docs/index-format

    extension = struct.pack(">4sI", extension_name, 0)
    data = BASE_EMPTY_GIT_INDEX + extension
    # Append checksum to the end.
    return data + pygit2.hash(data).raw
Beispiel #14
0
    def create(self, subject, **obj):
        """creates a staged subject entry including its indexed fields.

        :param subject: a string or a :py:class:`Subject` subclass reference
        :param ``**kw``: the field values
        :return: an instance of the given subject

        """
        subject = resolve_subject(subject)
        subject_uuid = obj.get('uuid', uuid4().hex)

        subject_data = self.serialize(obj)
        object_hash = bytes(pygit2.hash(subject_data))
        self.add_spo(os.path.join(subject, 'objects'), object_hash,
                     subject_data)

        predicate_ids = []

        self.add_spo(os.path.join(subject, '_ids'), subject_uuid, object_hash)
        self.add_spo(os.path.join(subject, '_uuids'), object_hash,
                     subject_uuid)

        indexes = {}
        for key in obj.keys():
            value = obj[key]
            if subject_has_index(subject, key):
                indexes[key] = value

            predicate_ids.append(
                self.add_spo(os.path.join(subject, 'indexes', key),
                             object_hash, value))

        node = Subject.from_data(subject, **obj)

        self.queries.append(' '.join(map(bytes, ['CREATE', node])))
        return node
Beispiel #15
0
 def test_hash(self):
     data = "foobarbaz"
     hashed_sha1 = pygit2.hash(data)
     written_sha1 = self.repo.create_blob(data)
     self.assertEqual(hashed_sha1, written_sha1)
Beispiel #16
0
 def test_hash(self):
     data = "foobarbaz"
     hashed_sha1 = pygit2.hash(data)
     written_sha1 = self.repo.create_blob(data)
     assert hashed_sha1 == written_sha1
Beispiel #17
0
 def test_hash(self):
     data = "foobarbaz"
     hashed_sha1 = pygit2.hash(data)
     written_sha1 = self.repo.create_blob(data)
     self.assertEqual(hashed_sha1, written_sha1)
Beispiel #18
0
    def diff_db_to_tree(self, dataset, pk_filter=UNFILTERED):
        """
        Generates a diff between a working copy DB and the underlying repository tree,
        for a single dataset only.

        Pass a list of PK values to filter results to them
        """
        pk_filter = pk_filter or UNFILTERED
        with self.session() as db:
            dbcur = db.cursor()

            table = dataset.name

            meta_diff = {}
            meta_old = {
                key: dataset.get_meta_item(key)
                for key in GPKG_META_ITEMS
            }
            meta_new = dict(self.read_meta(dataset))
            for name in set(meta_new.keys()) ^ set(meta_old.keys()):
                v_old = meta_old.get(name)
                v_new = meta_new.get(name)
                if v_old or v_new:
                    meta_diff[name] = (v_old, v_new)

            pk_field = dataset.primary_key

            diff_sql = f"""
                SELECT
                    {self.TRACKING_TABLE}.pk AS ".__track_pk",
                    {gpkg.ident(table)}.*
                FROM {self.TRACKING_TABLE} LEFT OUTER JOIN {gpkg.ident(table)}
                ON ({self.TRACKING_TABLE}.pk = {gpkg.ident(table)}.{gpkg.ident(pk_field)})
                WHERE ({self.TRACKING_TABLE}.table_name = ?)
            """
            params = [table]
            if pk_filter is not UNFILTERED:
                diff_sql += f"\nAND {self.TRACKING_TABLE}.pk IN ({','.join(['?']*len(pk_filter))})"
                params += [str(pk) for pk in pk_filter]
            dbcur.execute(diff_sql, params)

            candidates_ins = collections.defaultdict(list)
            candidates_upd = {}
            candidates_del = collections.defaultdict(list)

            for row in dbcur:
                track_pk = row[0]
                db_obj = {k: row[k] for k in row.keys() if k != ".__track_pk"}

                try:
                    repo_obj = dataset.get_feature(track_pk, ogr_geoms=False)
                except KeyError:
                    repo_obj = None

                if db_obj[pk_field] is None:
                    if repo_obj:  # ignore INSERT+DELETE
                        blob_hash = pygit2.hash(
                            dataset.encode_feature_blob(repo_obj)).hex
                        candidates_del[blob_hash].append((track_pk, repo_obj))
                    continue

                elif not repo_obj:
                    # INSERT
                    blob_hash = pygit2.hash(
                        dataset.encode_feature_blob(db_obj)).hex
                    candidates_ins[blob_hash].append(db_obj)

                else:
                    # UPDATE
                    s_old = set(repo_obj.items())
                    s_new = set(db_obj.items())
                    if s_old ^ s_new:
                        candidates_upd[track_pk] = (repo_obj, db_obj)

            # detect renames
            for h in list(candidates_del.keys()):
                if h in candidates_ins:
                    track_pk, repo_obj = candidates_del[h].pop(0)
                    db_obj = candidates_ins[h].pop(0)

                    candidates_upd[track_pk] = (repo_obj, db_obj)

                    if not candidates_del[h]:
                        del candidates_del[h]
                    if not candidates_ins[h]:
                        del candidates_ins[h]

            return diff.Diff(
                dataset,
                meta=meta_diff,
                inserts=list(itertools.chain(*candidates_ins.values())),
                deletes=dict(itertools.chain(*candidates_del.values())),
                updates=candidates_upd,
            )
Beispiel #19
0
    def changeset(self, commit):

        if (
            not self.config.hasFeature(Feature.Persistence)
        ) and (
            not self.config.hasFeature(Feature.Provenance)
        ):
            return

        g = self.store.store

        if self.config.hasFeature(Feature.Provenance):
            role_author_uri = QUIT['Author']
            role_committer_uri = QUIT['Committer']

            g.add((role_author_uri, is_a, PROV['Role']))
            g.add((role_committer_uri, is_a, PROV['Role']))

        # Create the commit
        i1, commitid = self.instance(commit.id, True)

        commit_uri = QUIT['commit-' + commit.id]

        if self.config.hasFeature(Feature.Provenance):
            g.add((commit_uri, is_a, PROV['Activity']))

            if 'Source' in commit.properties.keys():
                g.add((commit_uri, is_a, QUIT['Import']))
                sources = commit.properties['Source'].strip()
                for source in re.findall("<.*?>", sources):
                    g.add((commit_uri, QUIT['dataSource'], URIRef(source.strip("<>"))))
            if 'Query' in commit.properties.keys():
                g.add((commit_uri, is_a, QUIT['Transformation']))
                g.add((commit_uri, QUIT['query'], Literal(
                    commit.properties['Query'].strip())))

            g.add((commit_uri, QUIT['hex'], Literal(commit.id)))
            g.add((commit_uri, PROV['startedAtTime'], Literal(
                git_timestamp(commit.author.time, commit.author.offset),
                datatype=XSD.dateTime)))
            g.add((commit_uri, PROV['endedAtTime'], Literal(
                git_timestamp(commit.committer.time, commit.committer.offset),
                datatype=XSD.dateTime)))
            g.add((commit_uri, RDFS['label'],
                   Literal(commit.message.strip())))

            # Author
            hash = pygit2.hash(commit.author.email).hex
            author_uri = QUIT['user-' + hash]
            g.add((commit_uri, PROV['wasAssociatedWith'], author_uri))

            g.add((author_uri, is_a, PROV['Agent']))
            g.add((author_uri, RDFS.label, Literal(commit.author.name)))
            g.add((author_uri, FOAF.mbox, Literal(commit.author.email)))

            q_author_uri = BNode()
            g.add((commit_uri, PROV['qualifiedAssociation'], q_author_uri))
            g.add((q_author_uri, is_a, PROV['Association']))
            g.add((q_author_uri, PROV['agent'], author_uri))
            g.add((q_author_uri, PROV['role'], role_author_uri))

            if commit.author.name != commit.committer.name:
                # Committer
                hash = pygit2.hash(commit.committer.email).hex
                committer_uri = QUIT['user-' + hash]
                g.add((commit_uri, PROV['wasAssociatedWith'], committer_uri))

                g.add((committer_uri, is_a, PROV['Agent']))
                g.add((committer_uri, RDFS.label, Literal(commit.committer.name)))
                g.add((committer_uri, FOAF.mbox, Literal(commit.committer.email)))

                q_committer_uri = BNode()
                g.add(
                    (commit_uri, PROV['qualifiedAssociation'], q_committer_uri))
                g.add((q_committer_uri, is_a, PROV['Association']))
                g.add((q_committer_uri, PROV['agent'], author_uri))
                g.add((q_committer_uri, PROV['hadRole'], role_committer_uri))
            else:
                g.add((q_author_uri, PROV['hadRole'], role_committer_uri))

            # Parents
            for parent in iter(commit.parents or []):
                parent_uri = QUIT['commit-' + parent.id]
                g.add((commit_uri, QUIT["preceedingCommit"], parent_uri))
                g.add((commit_uri, PROV["wasInformedBy"], parent_uri))

            # Diff
            parent = next(iter(commit.parents or []), None)

            i2, commitid = self.instance(parent.id, True) if parent else (None, None)

            delta = graphdiff(i2.store if i2 else None, i1.store)

            for index, (iri, changesets) in enumerate(delta.items()):
                update_uri = QUIT['update-{}-{}'.format(commit.id, index)]
                g.add((update_uri, QUIT['graph'], iri))
                g.add((commit_uri, QUIT['updates'], update_uri))
                for (op, triples) in changesets:
                    op_uri = QUIT[op + '-' + commit.id]
                    g.add((update_uri, QUIT[op], op_uri))
                    g.addN((s, p, o, op_uri) for s, p, o in triples)

        # Entities
        if commit.id not in self._graphconfigs:
            self.updateGraphConfig(commit.id)

        map = self._graphconfigs.get(commit.id).getgraphurifilemap()

        for entity in commit.node().entries(recursive=True):
            # todo check if file was changed
            if entity.is_file:

                if entity.name not in map.values():
                    continue

                graphUri = self._graphconfigs.get(commit.id).getgraphuriforfile(entity.name)
                blob = (entity.name, entity.oid)

                try:
                    f, context = self.getFileReferenceAndContext(blob, commit)
                except KeyError:
                    graph = Graph(identifier=graphUri)
                    graph.parse(data=entity.content, format='nt')

                    self._blobs.set(
                        blob, (FileReference(entity.name, entity.content), graph)
                    )

                private_uri = QUIT["graph-{}".format(entity.oid)]

                if (
                    self.config.hasFeature(Feature.Provenance) or
                    self.config.hasFeature(Feature.Persistence)
                ):
                    g.add((private_uri, is_a, PROV['Entity']))
                    g.add(
                        (private_uri, PROV['specializationOf'], context.identifier))
                    g.add(
                        (private_uri, PROV['wasGeneratedBy'], commit_uri))

                    q_usage = BNode()
                    g.add((private_uri, PROV['qualifiedGeneration'], q_usage))
                    g.add((q_usage, is_a, PROV['Generation']))
                    g.add((q_usage, PROV['activity'], commit_uri))

                    prev = next(entity.history(), None)
                    if prev:
                        prev_uri = QUIT["graph-{}-{}".format(prev.oid, index)]
                        g.add((private_uri, PROV['wasDerivedFrom'], prev_uri))
                        g.add((commit_uri, PROV['used'], prev_uri))

                        q_derivation = BNode()
                        g.add((private_uri, PROV['qualifiedDerivation'], q_derivation))
                        g.add((q_derivation, is_a, PROV['Derivation']))
                        g.add((q_derivation, PROV['entity'], prev_uri))
                        g.add((q_derivation, PROV['hadActivity'], commit_uri))
                if self.config.hasFeature(Feature.Persistence):
                    g.addN((s, p, o, private_uri) for s, p, o
                           in context.triples((None, None, None)))
Beispiel #20
0
    def changeset(self, commit):

        if (not self.config.hasFeature(Feature.Persistence)) and (
                not self.config.hasFeature(Feature.Provenance)):
            return

        g = self.store.store

        if self.config.hasFeature(Feature.Provenance):
            role_author_uri = QUIT['Author']
            role_committer_uri = QUIT['Committer']

            g.add((role_author_uri, is_a, PROV['Role']))
            g.add((role_committer_uri, is_a, PROV['Role']))

        # Create the commit
        i1, commitid = self.instance(commit.id, True)

        commit_uri = QUIT['commit-' + commit.id]

        if self.config.hasFeature(Feature.Provenance):
            g.add((commit_uri, is_a, PROV['Activity']))

            if 'Source' in commit.properties.keys():
                g.add((commit_uri, is_a, QUIT['Import']))
                sources = commit.properties['Source'].strip()
                for source in re.findall("<.*?>", sources):
                    g.add((commit_uri, QUIT['dataSource'],
                           URIRef(source.strip("<>"))))
            if 'Query' in commit.properties.keys():
                g.add((commit_uri, is_a, QUIT['Transformation']))
                g.add((commit_uri, QUIT['query'],
                       Literal(commit.properties['Query'].strip())))

            g.add((commit_uri, QUIT['hex'], Literal(commit.id)))
            g.add((commit_uri, PROV['startedAtTime'],
                   Literal(git_timestamp(commit.author.time,
                                         commit.author.offset),
                           datatype=XSD.dateTime)))
            g.add((commit_uri, PROV['endedAtTime'],
                   Literal(git_timestamp(commit.committer.time,
                                         commit.committer.offset),
                           datatype=XSD.dateTime)))
            g.add((commit_uri, RDFS['label'], Literal(commit.message.strip())))

            # Author
            hash = pygit2.hash(commit.author.email).hex
            author_uri = QUIT['user-' + hash]
            g.add((commit_uri, PROV['wasAssociatedWith'], author_uri))

            g.add((author_uri, is_a, PROV['Agent']))
            g.add((author_uri, RDFS.label, Literal(commit.author.name)))
            g.add((author_uri, FOAF.mbox, Literal(commit.author.email)))

            q_author_uri = BNode()
            g.add((commit_uri, PROV['qualifiedAssociation'], q_author_uri))
            g.add((q_author_uri, is_a, PROV['Association']))
            g.add((q_author_uri, PROV['agent'], author_uri))
            g.add((q_author_uri, PROV['role'], role_author_uri))

            if commit.author.name != commit.committer.name:
                # Committer
                hash = pygit2.hash(commit.committer.email).hex
                committer_uri = QUIT['user-' + hash]
                g.add((commit_uri, PROV['wasAssociatedWith'], committer_uri))

                g.add((committer_uri, is_a, PROV['Agent']))
                g.add((committer_uri, RDFS.label,
                       Literal(commit.committer.name)))
                g.add((committer_uri, FOAF.mbox,
                       Literal(commit.committer.email)))

                q_committer_uri = BNode()
                g.add((commit_uri, PROV['qualifiedAssociation'],
                       q_committer_uri))
                g.add((q_committer_uri, is_a, PROV['Association']))
                g.add((q_committer_uri, PROV['agent'], author_uri))
                g.add((q_committer_uri, PROV['hadRole'], role_committer_uri))
            else:
                g.add((q_author_uri, PROV['hadRole'], role_committer_uri))

            # Parents
            for parent in iter(commit.parents or []):
                parent_uri = QUIT['commit-' + parent.id]
                g.add((commit_uri, QUIT["preceedingCommit"], parent_uri))
                g.add((commit_uri, PROV["wasInformedBy"], parent_uri))

            # Diff
            parent = next(iter(commit.parents or []), None)

            i2, commitid = self.instance(parent.id, True) if parent else (None,
                                                                          None)

            delta = graphdiff(i2.store if i2 else None, i1.store)

            for index, (iri, changesets) in enumerate(delta.items()):
                update_uri = QUIT['update-{}-{}'.format(commit.id, index)]
                g.add((update_uri, QUIT['graph'], iri))
                g.add((commit_uri, QUIT['updates'], update_uri))
                for (op, triples) in changesets:
                    op_uri = QUIT[op + '-' + commit.id]
                    g.add((update_uri, QUIT[op], op_uri))
                    g.addN((s, p, o, op_uri) for s, p, o in triples)

        # Entities
        if commit.id not in self._graphconfigs:
            self.updateGraphConfig(commit.id)

        map = self._graphconfigs.get(commit.id).getgraphurifilemap()

        for entity in commit.node().entries(recursive=True):
            # todo check if file was changed
            if entity.is_file:

                if entity.name not in map.values():
                    continue

                graphUri = self._graphconfigs.get(
                    commit.id).getgraphuriforfile(entity.name)
                blob = (entity.name, entity.oid)

                try:
                    f, context = self.getFileReferenceAndContext(blob, commit)
                except KeyError:
                    graph = Graph(identifier=graphUri)
                    graph.parse(data=entity.content, format='nt')

                    self._blobs.set(
                        blob,
                        (FileReference(entity.name, entity.content), graph))

                private_uri = QUIT["graph-{}".format(entity.oid)]

                if (self.config.hasFeature(Feature.Provenance)
                        or self.config.hasFeature(Feature.Persistence)):
                    g.add((private_uri, is_a, PROV['Entity']))
                    g.add((private_uri, PROV['specializationOf'],
                           context.identifier))
                    g.add((private_uri, PROV['wasGeneratedBy'], commit_uri))

                    q_usage = BNode()
                    g.add((private_uri, PROV['qualifiedGeneration'], q_usage))
                    g.add((q_usage, is_a, PROV['Generation']))
                    g.add((q_usage, PROV['activity'], commit_uri))

                    prev = next(entity.history(), None)
                    if prev:
                        prev_uri = QUIT["graph-{}-{}".format(prev.oid, index)]
                        g.add((private_uri, PROV['wasDerivedFrom'], prev_uri))
                        g.add((commit_uri, PROV['used'], prev_uri))

                        q_derivation = BNode()
                        g.add((private_uri, PROV['qualifiedDerivation'],
                               q_derivation))
                        g.add((q_derivation, is_a, PROV['Derivation']))
                        g.add((q_derivation, PROV['entity'], prev_uri))
                        g.add((q_derivation, PROV['hadActivity'], commit_uri))
                if self.config.hasFeature(Feature.Persistence):
                    g.addN((s, p, o, private_uri)
                           for s, p, o in context.triples((None, None, None)))
Beispiel #21
0
    def create_vertex(self, vertex, **obj):
        """creates a staged vertex entry including its indexed fields.

        :param vertex: a string or a :py:class:`Vertex` subclass reference
        :param ``**kw``: the field values
        :returns: an instance of the given vertex

        """
        vertex = resolve_vertex_name(vertex)
        predicate_ids = []

        vertex_uuid = obj.pop('uuid', generate_uuid())
        obj['uuid'] = vertex_uuid

        vertex_data = self.serialize(obj)
        object_hash = bytes(pygit2.hash(vertex_data))
        object_path = os.path.join(vertex, 'objects')

        id_path = os.path.join(vertex, '_ids')
        uuid_path = os.path.join(vertex, '_uuids')

        original_obj = obj.copy()
        origin = obj.pop('origin')
        target = obj.pop('target')

        indexes = {}
        for key in obj.keys():
            value = obj.get(key, None)
            if vertex_has_index(vertex, key):
                indexes[key] = value

            predicate_path = os.path.join(vertex, 'indexes', key)
            predicate_ids.append(
                self.add_spo(predicate_path, object_hash, value))

        self.add_spo(id_path, vertex_uuid, object_hash)
        self.add_spo(uuid_path, object_hash, vertex_uuid)

        origin_name = resolve_edge_name(origin)
        target_name = resolve_edge_name(target)
        RelationhipModel = Vertex.definition(vertex)

        label = RelationhipModel.label
        # call('Car/incoming/bought_by/Person', 'chuck-uuid', 'car-uuid'),
        # call('___vertices___/Car/bought_by/Person', 'chuck-uuid', 'car-uuid'),
        path_templates = {
            'incoming': '{to}/incoming/{label}/{from}',
            'outgoing': '{from}/outgoing/{label}/{to}',
            'indirect': '{}/indirect/{label}/{}',
        }
        vertex_path_template = path_templates[RelationhipModel.direction]

        ctx = {'label': label}
        direction = RelationhipModel.direction
        # self.add_spo(object_path, object_hash, vertex_data)

        if direction == 'incoming':
            from_uuid = origin.uuid
            ctx['from'] = origin_name
            to_uuid = target.uuid
            ctx['to'] = target_name
            vertex_path = vertex_path_template.format(**ctx)
            self.add_spo(vertex_path, from_uuid, to_uuid)

        elif direction == 'outgoing':
            from_uuid = target.uuid
            ctx['from'] = target_name
            to_uuid = origin.uuid
            ctx['to'] = origin_name
            vertex_path = vertex_path_template.format(**ctx)
            self.add_spo(vertex_path, from_uuid, to_uuid)

        elif direction == 'indirect':
            from_uuid = target.uuid
            to_uuid = origin.uuid

            path = vertex_path_template.format(target_name, origin_name, **ctx)
            self.add_spo(path, from_uuid, to_uuid)

            path = vertex_path_template.format(origin_name, target_name, **ctx)
            self.add_spo(path, to_uuid, from_uuid)

        return RelationhipModel.from_data(vertex, **original_obj)
Beispiel #22
0
 def test_hash(self):
     data = "foobarbaz"
     hashed_sha1 = pygit2.hash(data)
     written_sha1 = self.repo.create_blob(data)
     assert hashed_sha1 == written_sha1
Beispiel #23
0
 def sha1(self):
     if self._sha1:
         return self._sha1
     else:
         self._sha1 = oid2hex(pygit2.hash(self.as_object()))
         return self._sha1