Example #1
0
    def update(self):
        """
        Prepare for the update, getting graph and update_data.  Then do the update, producing triples
        :return: list(graph, graph): The add and sub graphs for performing the update
        """
        from vivopump import read_csv, get_graph
        from rdflib import Graph
        import logging
        import os.path
        import time

        logging.basicConfig(level=logging.INFO)

        if self.update_data is None:  # Test for injection
            self.update_data = read_csv(self.out_filename,
                                        delimiter=self.inter)

        # Narrow the update_def to include only columns that appear in the update_data

        new_update_columns = {}
        for name, path in self.update_def['column_defs'].items():
            if name in self.update_data[self.update_data.keys()[0]].keys():
                new_update_columns[name] = path
        self.update_def['column_defs'] = new_update_columns

        if self.original_graph is None:  # Test for injection

            # Create the original graph from VIVO

            self.original_graph = get_graph(self.update_def,
                                            self.query_parms,
                                            debug=self.verbose)

        self.update_graph = Graph()
        for s, p, o in self.original_graph:
            self.update_graph.add((s, p, o))

        if self.verbose:
            print datetime.now(), 'Graphs ready for processing. Original has ', len(self.original_graph), \
                '. Update graph has', len(self.update_graph)
            print datetime.now(), 'Updates ready for processing. ', len(
                self.update_data), 'rows.'
            if len(self.enum) == 0:
                print datetime.now(), "No enumerations"
            else:
                for key in self.enum.keys():
                    print datetime.now(), key, "modified", time.ctime(os.path.getmtime(key)), \
                        "get", len(self.enum[key]['get']), "update", \
                        len(self.enum[key]['update'])

        return self.__do_update()
Example #2
0
    def update(self, filename=None, inter='\t', intra=';'):
        """
        Prepare for the update, getting graph and update_data.  Then do the update, producing triples
        """

        from vivopump import read_csv, get_graph
        from rdflib import Graph
        import logging

        self.intra = intra
        self.inter = inter

        logging.basicConfig(level=logging.INFO)
        if filename is not None:
            self.out_filename = filename

        if self.update_data is None:  # Test for injection
            self.update_data = read_csv(self.out_filename, delimiter=inter)

        # Narrow the update_def to include only columns that appear in the update_data

        new_update_columns = {}
        for name, path in self.update_def['column_defs'].items():
            if name in self.update_data[1].keys():
                new_update_columns[name] = path
        self.update_def['column_defs'] = new_update_columns

        self.enum = load_enum(self.update_def)

        if self.original_graph is None:  # Test for injection
            self.original_graph = get_graph(
                self.update_def,
                debug=self.verbose)  # Create the original graph from VIVO

        self.update_graph = Graph()
        for s, p, o in self.original_graph:
            self.update_graph.add((s, p, o))

        if self.verbose:
            print datetime.now(), 'Graphs ready for processing. Original has ', len(self.original_graph), \
                '. Update graph has', len(self.update_graph)
            print datetime.now(), 'Updates ready for processing. ', len(
                self.update_data), 'rows.'
            if len(self.enum) == 0:
                print datetime.now(), "No enumerations"
            else:
                for key in self.enum.keys():
                    print datetime.now(), key, "get", len(self.enum[key]['get']), "update", \
                        len(self.enum[key]['update'])

        return self.do_update()
Example #3
0
    def update(self, filename=None, inter='\t', intra=';'):
        """
        Prepare for the update, getting graph and update_data.  Then do the update, producing triples
        """

        from vivopump import read_csv, get_graph
        from rdflib import Graph
        import logging

        self.intra = intra
        self.inter = inter

        logging.basicConfig(level=logging.INFO)
        if filename is not None:
            self.out_filename = filename

        if self.update_data is None:  # Test for injection
            self.update_data = read_csv(self.out_filename, delimiter=inter)

        # Narrow the update_def to include only columns that appear in the update_data

        new_update_columns = {}
        for name, path in self.update_def['column_defs'].items():
            if name in self.update_data[1].keys():
                new_update_columns[name] = path
        self.update_def['column_defs'] = new_update_columns

        self.enum = load_enum(self.update_def)

        if self.original_graph is None:  # Test for injection
            self.original_graph = get_graph(self.update_def, debug=self.verbose)  # Create the original graph from VIVO

        self.update_graph = Graph()
        for s, p, o in self.original_graph:
            self.update_graph.add((s, p, o))

        if self.verbose:
            print datetime.now(), 'Graphs ready for processing. Original has ', len(self.original_graph), \
                '. Update graph has', len(self.update_graph)
            print datetime.now(), 'Updates ready for processing. ', len(self.update_data), 'rows.'
            if len(self.enum) == 0:
                print datetime.now(), "No enumerations"
            else:
                for key in self.enum.keys():
                    print datetime.now(), key, "get", len(self.enum[key]['get']), "update", \
                        len(self.enum[key]['update'])

        return self.do_update()
Example #4
0
    def update(self):
        """
        Prepare for the update, getting graph and update_data.  Then do the update, producing triples
        :return: list(graph, graph): The add and sub graphs for performing the update
        """
        from vivopump import read_csv, get_graph
        from rdflib import Graph
        import logging
        import os.path
        import time

        logging.basicConfig(level=logging.INFO)

        if self.update_data is None:  # Test for injection
            self.update_data = read_csv(self.out_filename, delimiter=self.inter)

        # Narrow the update_def to include only columns that appear in the update_data

        new_update_columns = {}
        for name, path in self.update_def['column_defs'].items():
            if name in self.update_data[self.update_data.keys()[0]].keys():
                new_update_columns[name] = path
        self.update_def['column_defs'] = new_update_columns

        if self.original_graph is None:  # Test for injection

            # Create the original graph from VIVO

            self.original_graph = get_graph(self.update_def, self.query_parms, debug=self.verbose)

        self.update_graph = Graph()
        for s, p, o in self.original_graph:
            self.update_graph.add((s, p, o))

        if self.verbose:
            print datetime.now(), 'Graphs ready for processing. Original has ', len(self.original_graph), \
                '. Update graph has', len(self.update_graph)
            print datetime.now(), 'Updates ready for processing. ', len(self.update_data), 'rows.'
            if len(self.enum) == 0:
                print datetime.now(), "No enumerations"
            else:
                for key in self.enum.keys():
                    print datetime.now(), key, "modified", time.ctime(os.path.getmtime(key)), \
                        "get", len(self.enum[key]['get']), "update", \
                        len(self.enum[key]['update'])

        return self.__do_update()
Example #5
0
    def update(self):
        """
        Prepare for the update, getting graph and update_data.  Then do the update, producing triples
        :return: list(graph, graph): The add and sub graphs for performing the update
        """
        from vivopump import read_csv, get_graph
        from rdflib import Graph
        import os.path
        import time

        if self.update_data is None:  # Test for injection
            self.update_data = read_csv(self.out_filename, delimiter=self.inter)

        #   Narrow the update_def to include only columns that appear in the update_data

        new_update_columns = {}
        for name, path in self.update_def['column_defs'].items():
            if len(self.update_data) > 0 and name in self.update_data[self.update_data.keys()[0]].keys():
                new_update_columns[name] = path

        self.update_def['column_defs'] = new_update_columns

        if self.original_graph is None:  # Test for injection

            # Create the original graph from VIVO

            self.original_graph = get_graph(self.update_def, self.query_parms)

        self.update_graph = Graph()
        for s, p, o in self.original_graph:
            self.update_graph.add((s, p, o))

        logger.info(u'Graphs ready for processing. Original has {} triples.  Update graph has {} triples.'.format(
            len(self.original_graph), len(self.update_graph)))
        logger.info(u'Updates ready for processing. {} rows in update.'.format(len(self.update_data)))

        if len(self.enum) == 0:
            logger.info(u"No enumerations")
        else:
            for key in self.enum.keys():
                logger.info(
                    u"Enumeration {} modified {}. {} entries in get enum.  {} entries in update enum".format(
                        key, time.ctime(os.path.getmtime(key)), len(self.enum[key]['get']),
                        len(self.enum[key]['update'])))
        return self.__do_update()
Example #6
0
    def update(self):
        """
        Prepare for the update, getting graph and update_data.  Then do the update, producing triples
        :return: list(graph, graph): The add and sub graphs for performing the update
        """
        from vivopump import read_csv, get_graph
        from rdflib import Graph
        import os.path
        import time

        if self.update_data is None:  # Test for injection
            self.update_data = read_csv(self.out_filename, delimiter=self.inter)

        #   Narrow the update_def to include only columns that appear in the update_data

        new_update_columns = {}
        for name, path in self.update_def['column_defs'].items():
            if name in self.update_data[self.update_data.keys()[0]].keys():
                new_update_columns[name] = path
        self.update_def['column_defs'] = new_update_columns

        if self.original_graph is None:  # Test for injection

            # Create the original graph from VIVO

            self.original_graph = get_graph(self.update_def, self.query_parms)

        self.update_graph = Graph()
        for s, p, o in self.original_graph:
            self.update_graph.add((s, p, o))

        logger.info(u'Graphs ready for processing. Original has {} triples.  Update graph has {} triples.'.format(
            len(self.original_graph), len(self.update_graph)))
        logger.info(u'Updates ready for processing. {} rows in update.'.format(len(self.update_data)))

        if len(self.enum) == 0:
            logger.info(u"No enumerations")
        else:
            for key in self.enum.keys():
                logger.info(
                    u"Enumeration {} modified {}. {} entries in get enum.  {} entries in update enum".format(
                        key, time.ctime(os.path.getmtime(key)), len(self.enum[key]['get']),
                        len(self.enum[key]['update'])))
        return self.__do_update()
Example #7
0
 def test_normal_case(self):
     update_def = read_update_def('data/grant_def.json')
     a = get_graph(update_def)
     print len(a)
     self.assertTrue(len(a) == 241611)