Exemple #1
0
def process_row(bel_parser: BELParser, row: Dict, line_number: int) -> None:
    """Process a row."""
    if not row['Checked']:  # don't use unchecked material
        return

    if not (row['Correct'] or row['Changed']
            ):  # if it's neither correct nor changed, then it's f****d
        return

    reference = row['PMID']

    if not reference:
        raise Exception('missing reference')

    bel_parser.control_parser.citation_db = CITATION_TYPE_PUBMED
    bel_parser.control_parser.citation_db_id = reference

    # Set the evidence
    bel_parser.control_parser.evidence = row['Evidence']
    # TODO set annotations if they exist

    annotations = {
        'Curator': row['Curator'],
        'Confidence': 'Medium',  # needs re-curation
    }

    if 'INDRA UUID' in row:
        annotations['INDRA_UUID'] = row['INDRA UUID']

    if 'Belief' in row:
        annotations['INDRA_Belief'] = row['Belief']

    if 'API' in row:
        annotations['INDRA_API'] = row['API']

    # Set annotations
    bel_parser.control_parser.annotations.update(annotations)

    sub = row['Subject']
    obj = row['Object']

    # Build a BEL statement and parse it
    bel = f"{sub} {row['Predicate']} {obj}"

    # Cast line number from numpy.int64 to integer since JSON cannot handle this class
    line_number = int(line_number)

    try:
        bel_parser.parseString(bel, line_number=line_number)
    except BELParserWarning as exc:
        bel_parser.graph.add_warning(exc)
    except pyparsing.ParseException as exc:
        bel_parser.graph.add_warning(
            BELSyntaxError(line_number=line_number, line=bel,
                           position=exc.loc))
Exemple #2
0
 def setUpClass(cls):
     cls.parser = BELParser(
         graph=BELGraph(),  # gets overwritten in each test
         namespace_to_term_to_encoding=namespace_to_term,
         annotation_to_term=annotation_to_term,
         namespace_to_pattern={'dbSNP': re.compile('rs[0-9]*')},
     )
Exemple #3
0
 def setUpClass(cls):
     cls.graph = BELGraph()
     cls.parser = BELParser(
         cls.graph,
         namespace_to_term=namespace_to_term,
         annotation_to_term=annotation_to_term,
         namespace_to_pattern={'dbSNP': re.compile('rs[0-9]*')})
Exemple #4
0
    def get_graph(
        self,
        use_cached: bool = True,
        use_tqdm: bool = False,
        tqdm_kwargs: Optional[Mapping[str, Any]] = None,
    ) -> BELGraph:
        """Get the BEL graph from all sheets in this repository.

        .. warning:: This BEL graph isn't pre-filled with namespace and annotation URLs.
        """
        if use_cached and os.path.exists(self._cache_json_path):
            return pybel.from_nodelink_gz(self._cache_json_path)

        graph = BELGraph()
        if self.metadata is not None:
            self.metadata.update(graph)

        logger.info('streamlining parser')
        bel_parser = BELParser(graph)

        paths = list(self.iterate_sheets_paths())

        if use_tqdm:
            _tqdm_kwargs = dict(desc=f'Sheets in {self.directory}')
            if tqdm_kwargs:
                _tqdm_kwargs.update(tqdm_kwargs)
            paths = tqdm(list(paths), **_tqdm_kwargs)

        for path in paths:
            graph.path = path

            try:
                df = pd.read_excel(path)
            except LookupError as exc:
                logger.warning(f'Error opening {path}: {exc}')
                continue

            # Check columns in DataFrame exist
            if not _check_curation_template_columns(df):
                logger.warning(f'^ above columns in {path} were missing')
                continue

            process_df(bel_parser=bel_parser,
                       df=df,
                       use_tqdm=use_tqdm,
                       tqdm_kwargs=dict(desc=f'Reading {path}'))

        if self.prior is not None:  # assign edges to sub-graphs
            prior = self.get_prior()
            assign_subgraphs(graph=graph, prior=prior)

        pybel.to_nodelink_file(graph,
                               self._cache_json_path,
                               indent=2,
                               sort_keys=True)

        return graph
def build_parser_service(app: Flask):
    """Add the parser app for sending and receiving BEL statements."""
    graph = BELGraph()
    parser = BELParser(graph, citation_clearing=False)

    @app.route('/api/parser/status')
    def get_status():
        """Return the status of the parser.

        ---
        tags:
            - parser
        """
        return jsonify({
            'status': 'ok',
            'graph_number_nodes': graph.number_of_nodes(),
            'graph_number_edges': graph.number_of_edges(),
            **graph.document,
        })

    @app.route('/api/parser/parse/<statement>', methods=['GET', 'POST'])
    def parse_bel(statement):
        """Parse a URL-encoded BEL statement.

        ---
        tags:
            - parser
        parameters:
          - name: statement
            in: query
            description: A BEL statement
            required: true
            type: string
        """
        parser.control_parser.clear()

        parser.control_parser.evidence = str(uuid4())
        parser.control_parser.citation = dict(type=str(uuid4()), reference=str(uuid4()))

        parser.control_parser.annotations.update({
            METADATA_TIME_ADDED: str(time.asctime()),
            METADATA_IP: request.remote_addr,
            METADATA_HOST: request.host,
            METADATA_USER: request.remote_user,
            **request.args,
        })

        try:
            res = parser.statement.parseString(statement)
        except Exception as e:
            return jsonify({
                'status': 'bad',
                'exception': str(e),
                'input': statement,
            })
        else:
            return jsonify(**res.asDict())
    def setUp(self):
        graph = BELGraph()

        namespace_to_term = {
            'HGNC': {
                (None, 'AKT1'): 'GRP',
                (None, 'YFG'): 'GRP'
            },
            'MESH': {
                (None, 'nucleus'): 'A'
            }
        }

        self.parser = BELParser(graph, namespace_to_term_to_encoding=namespace_to_term, autostreamline=False)
Exemple #7
0
    def setUp(self):
        graph = BELGraph()

        namespace_dict = {
            'HGNC': {
                'AKT1': 'GRP',
                'YFG': 'GRP'
            },
            'MESHCS': {
                'nucleus': 'A'
            }
        }

        self.parser = BELParser(graph,
                                namespace_dict=namespace_dict,
                                autostreamline=False)
Exemple #8
0
 def setUpClass(cls):
     cls.graph = BELGraph()
     cls.parser = BELParser(cls.graph,
                            namespace_dict=namespaces,
                            annotation_dict=annotations,
                            namespace_regex={'dbSNP': 'rs[0-9]*'})