Example #1
0
def build_attribute_parser():
    quoted = pp.QuotedString('"') ^ pp.QuotedString("'")
    colon = pp.Literal(':').suppress()
    attribute_name = pp.Word(pp.srange('[a-z]'), pp.srange('[a-z0-9_]')).setResultsName('name')
    data_type = (pp.Combine(pp.Word(pp.alphas) + pp.SkipTo("#", ignore=quoted))
                 ^ pp.QuotedString('<', endQuoteChar='>', unquoteResults=False)).setResultsName('type')
    default = pp.Literal('=').suppress() + pp.SkipTo(colon, ignore=quoted).setResultsName('default')
    comment = pp.Literal('#').suppress() + pp.restOfLine.setResultsName('comment')
    return attribute_name + pp.Optional(default) + colon + data_type + comment
Example #2
0
def depersonalisefolders(parseresults):
    """removes personal_toolbar_folder tag. Acts on ParseResults instance in place (ie. a procedure)."""
    folders = top_folders_dict(parseresults)
    tag = pp.Literal('PERSONAL_TOOLBAR_FOLDER="true" ')
    parser = pp.Combine(
        pp.Optional(pp.SkipTo(tag) + tag.suppress()) + pp.SkipTo(pp.stringEnd))
    for i in folders.values():
        i = i[0]
        parseresults[i][0] = parser.parseString(parseresults[i][0])[0]
Example #3
0
def getSklWriteResGroupDefParser():
    writeResGroup = pp.Word(pp.alphanums)
    resources = pp.SkipTo("]")
    latency = pp.Word(pp.nums)
    microOps = pp.Word(pp.nums)
    resourceCycles = pp.SkipTo("]")
    return pp.Suppress("def ") + writeResGroup("SKLWriteResGroup") + pp.Suppress(": SchedWriteRes<[") + resources("Resources") + pp.Suppress(pp.restOfLine) + (
            pp.Suppress("let Latency = ") + latency("Latency")  + pp.Suppress(pp.restOfLine) +
            pp.Suppress("let NumMicroOps = ") + microOps("NumMicroOps") + pp.Suppress(pp.restOfLine) + 
            pp.Suppress("let ResourceCycles = [") + resourceCycles("ResourceCycles") + pp.Suppress(pp.restOfLine)
        )
Example #4
0
def compile_attribute(line, in_key=False):
    """
    Convert attribute definition from DataJoint format to SQL

    :param line: attribution line
    :param in_key: set to True if attribute is in primary key set
    :returns: (name, sql) -- attribute name and sql code for its declaration
    """
    quoted = pp.Or(pp.QuotedString('"'), pp.QuotedString("'"))
    colon = pp.Literal(':').suppress()
    attribute_name = pp.Word(pp.srange('[a-z]'),
                             pp.srange('[a-z0-9_]')).setResultsName('name')

    data_type = pp.Combine(pp.Word(pp.alphas) +
                           pp.SkipTo("#", ignore=quoted)).setResultsName(
                               'type')
    default = pp.Literal('=').suppress() + pp.SkipTo(
        colon, ignore=quoted).setResultsName('default')
    comment = pp.Literal('#').suppress() + pp.restOfLine.setResultsName(
        'comment')

    attribute_parser = attribute_name + pp.Optional(
        default) + colon + data_type + comment

    match = attribute_parser.parseString(line + '#', parseAll=True)
    match['comment'] = match['comment'].rstrip('#')
    if 'default' not in match:
        match['default'] = ''
    match = {k: v.strip() for k, v in match.items()}
    match['nullable'] = match['default'].lower() == 'null'

    literals = ['CURRENT_TIMESTAMP']  # not to be enclosed in quotes
    if match['nullable']:
        if in_key:
            raise DataJointError(
                'Primary key attributes cannot be nullable in line %s' % line)
        match[
            'default'] = 'DEFAULT NULL'  # nullable attributes default to null
    else:
        if match['default']:
            quote = match['default'].upper(
            ) not in literals and match['default'][0] not in '"\''
            match['default'] = ('NOT NULL DEFAULT ' +
                                ('"%s"' if quote else "%s") % match['default'])
        else:
            match['default'] = 'NOT NULL'
    match['comment'] = match['comment'].replace(
        '"', '\\"')  # escape double quotes in comment
    sql = ('`{name}` {type} {default}' +
           (' COMMENT "{comment}"' if match['comment'] else '')).format(
               **match)
    return match['name'], sql
Example #5
0
    def _parse_map_tables(report_str: str) -> Dict[str, str]:
        """
        Parse the tables from a ISE map report.

        Keys are the title of the table, values are the table body.
        """

        # Capture the title from section headings like:
        #
        # Section 12 - Control Set Information
        # ------------------------------------

        title = (
            pp.lineStart()
            + "Section"
            + ppc.integer
            + "-"
            + pp.SkipTo(pp.lineEnd())("title").setParseAction(pp.tokenMap(str.strip))
            + pp.lineEnd()
        )

        sec_hline = pp.Suppress(pp.lineStart() + pp.Word("-") + pp.lineEnd() * (1,))

        # Table horizontal lines like
        # +-------------------------------+
        hline = pp.lineStart() + pp.Word("+", "+-") + pp.lineEnd()

        # Most tables will have the format
        # +-----------------------+
        # | Col 1 | Col 2 | Col 3 |
        # +-----------------------+
        # | D1    | D2    | D3    |
        # ...
        # +-----------------------+
        #
        # However "Control Set Information" appears to use horizontal lines to
        # separate clocks within the data section. Therefore, just grab
        # everything until a horizontal line followed by a blank line rather
        # than something more precise.

        table = pp.Combine(hline + pp.SkipTo(hline + pp.LineEnd(), include=True))(
            "body"
        )
        table_section = title + sec_hline + table

        # Make line endings significant
        table_section.setWhitespaceChars(" \t")

        result = {t.title: t.body for t in table_section.searchString(report_str)}

        return result
Example #6
0
def build_attribute_parser():
    quoted = pp.QuotedString('"') ^ pp.QuotedString("'")
    colon = pp.Literal(":").suppress()
    attribute_name = pp.Word(pp.srange("[a-z]"),
                             pp.srange("[a-z0-9_]")).setResultsName("name")
    data_type = (
        pp.Combine(pp.Word(pp.alphas) + pp.SkipTo("#", ignore=quoted))
        ^ pp.QuotedString("<", endQuoteChar=">",
                          unquoteResults=False)).setResultsName("type")
    default = pp.Literal("=").suppress() + pp.SkipTo(
        colon, ignore=quoted).setResultsName("default")
    comment = pp.Literal("#").suppress() + pp.restOfLine.setResultsName(
        "comment")
    return attribute_name + pp.Optional(default) + colon + data_type + comment
Example #7
0
def _preprocessing_artifact():
  return (
      pyparsing.Literal('#')
      + _natural()
      + pyparsing.dblQuotedString
      + pyparsing.SkipTo(pyparsing.LineEnd())
  ).suppress()
Example #8
0
def _bit_field():
  expression = expression_parser.expression_parser()
  return (
      pyparsing.Optional(_identifier(), None)
      + _COLON
      + pyparsing.SkipTo(_SEMICOLON | _COMA)
  ).setParseAction(_construct_bitfield(expression))
Example #9
0
class PyparsingConstants(object):
    """Constants for pyparsing-based parsers."""

    # Numbers.
    INTEGER = pyparsing.Word(pyparsing.nums).setParseAction(PyParseIntCast)

    IPV4_ADDRESS = pyparsing.pyparsing_common.ipv4_address
    IPV6_ADDRESS = pyparsing.pyparsing_common.ipv6_address
    IP_ADDRESS = (IPV4_ADDRESS | IPV6_ADDRESS)

    # TODO: deprecate and remove, use THREE_LETTERS instead.
    # TODO: fix Python 3 compatibility of .uppercase and .lowercase.
    # pylint: disable=no-member
    MONTH = pyparsing.Word(pyparsing.string.ascii_uppercase,
                           pyparsing.string.ascii_lowercase,
                           exact=3)

    # Define date structures.
    HYPHEN = pyparsing.Literal('-').suppress()

    ONE_OR_TWO_DIGITS = pyparsing.Word(pyparsing.nums, min=1,
                                       max=2).setParseAction(PyParseIntCast)
    TWO_DIGITS = pyparsing.Word(pyparsing.nums,
                                exact=2).setParseAction(PyParseIntCast)
    THREE_DIGITS = pyparsing.Word(pyparsing.nums,
                                  exact=3).setParseAction(PyParseIntCast)
    FOUR_DIGITS = pyparsing.Word(pyparsing.nums,
                                 exact=4).setParseAction(PyParseIntCast)

    THREE_LETTERS = pyparsing.Word(pyparsing.alphas, exact=3)

    DATE_ELEMENTS = (FOUR_DIGITS.setResultsName('year') +
                     pyparsing.Suppress('-') +
                     TWO_DIGITS.setResultsName('month') +
                     pyparsing.Suppress('-') +
                     TWO_DIGITS.setResultsName('day_of_month'))
    TIME_ELEMENTS = (TWO_DIGITS.setResultsName('hours') +
                     pyparsing.Suppress(':') +
                     TWO_DIGITS.setResultsName('minutes') +
                     pyparsing.Suppress(':') +
                     TWO_DIGITS.setResultsName('seconds'))
    TIME_MSEC_ELEMENTS = (TIME_ELEMENTS +
                          pyparsing.Word('.,', exact=1).suppress() +
                          INTEGER.setResultsName('microseconds'))

    # Date structures defined as a single group.
    DATE = pyparsing.Group(DATE_ELEMENTS)
    DATE_TIME = pyparsing.Group(DATE_ELEMENTS + TIME_ELEMENTS)
    DATE_TIME_MSEC = pyparsing.Group(DATE_ELEMENTS + TIME_MSEC_ELEMENTS)
    TIME = pyparsing.Group(TIME_ELEMENTS)

    TIME_MSEC = TIME + pyparsing.Suppress('.') + INTEGER
    # TODO: replace by
    # TIME_MSEC = pyparsing.Group(TIME_MSEC_ELEMENTS)

    COMMENT_LINE_HASH = pyparsing.Literal('#') + pyparsing.SkipTo(
        pyparsing.LineEnd())
    # TODO: Add more commonly used structs that can be used by parsers.
    PID = pyparsing.Word(pyparsing.nums, min=1,
                         max=5).setParseAction(PyParseIntCast)
Example #10
0
def getBoldUrls(lines=[],sub=0):
    abstart,abend = pyparsing.makeHTMLTags('B')
    grammer2 = abstart + pyparsing.SkipTo(abend) + abend.suppress()
    for x1,x2,x3 in grammer2.scanString(''.join(lines)):
        print x1
        print x2
        print x3
Example #11
0
    def generate_entity_results_from_analysis(self, analysis):
        LOGGER.debug(f'generating entity results...')
        filtered_results = {
            k: v
            for (k, v) in self.results.items()
            if v.analysis is analysis and isinstance(v, AbstractFileResult)
        }

        result: AbstractFileResult
        for _, result in filtered_results.items():

            entity_keywords: List[str] = [GroovyParsingKeyword.CLASS.value]
            entity_name = pp.Word(pp.alphanums)
            match_expression = pp.Keyword(GroovyParsingKeyword.CLASS.value) + \
                entity_name.setResultsName(CoreParsingKeyword.ENTITY_NAME.value) + \
                pp.Optional(pp.Keyword(GroovyParsingKeyword.EXTENDS.value) +
                            entity_name.setResultsName(CoreParsingKeyword.INHERITED_ENTITY_NAME.value)) + \
                pp.SkipTo(pp.FollowedBy(GroovyParsingKeyword.OPEN_SCOPE.value))

            comment_keywords: Dict[str, str] = {
                CoreParsingKeyword.LINE_COMMENT.value:
                GroovyParsingKeyword.INLINE_COMMENT.value,
                CoreParsingKeyword.START_BLOCK_COMMENT.value:
                GroovyParsingKeyword.START_BLOCK_COMMENT.value,
                CoreParsingKeyword.STOP_BLOCK_COMMENT.value:
                GroovyParsingKeyword.STOP_BLOCK_COMMENT.value
            }
            entity_results = result.generate_entity_results_from_scopes(
                entity_keywords, match_expression, comment_keywords)

            for entity_result in entity_results:
                self._add_inheritance_to_entity_result(entity_result)
                self._add_imports_to_entity_result(entity_result)
                self.create_unique_entity_name(entity_result)
                self._results[entity_result.unique_name] = entity_result
Example #12
0
def enumeratedItems(baseExpr=None, form='[1]', **min_max):
    """Parser for enumerated items
    
    Examples:
    [1] abc
    [2] def

    ==> ['abc', 'def']
    """
    if form is None:
        form = '[1]'
    if '1' in form:
        no = pp.Regex(re.escape(form).replace(
            '1', '(?P<no>\\d+)'))  #.setParseAction(lambda x:x.no)
    else:
        no = pp.Regex(re.escape(form))
    # no.suppress()
    if 'exact' in min_max and min_max['exact'] > 0:
        max_ = min_ = exact
    else:
        min_ = min_max.get('min', 0)
        max_ = min_max.get('max', None)
    if baseExpr is None:
        return (pp.Group(no + pp.SkipTo(pp.StringEnd() | no).setParseAction(
            _strip()))) * (min_, max_)
    else:
        return (pp.Group(no + baseExpr.setParseAction(_strip()))) * (min_,
                                                                     max_)
Example #13
0
def parse_first_case_line(first_case_line):
    data = {"case_order": first_case_line[0]}

    gender = p.Suppress(p.Literal("(")) + p.Word(
        p.alphas, exact=1).setResultsName("gender") + p.Suppress(
            p.Literal(")"))

    dob = p.Suppress(
        p.Literal("DOB:")) + date.setResultsName("dob") + p.Suppress(
            p.Literal("Age:")) + p.Word(p.nums).setResultsName("age")

    linked_case = p.Suppress(p.Literal("LINKED CASE"))
    provisional = p.Suppress(p.Literal("PROVISIONAL"))

    first_case_line_detail = p.And([
        p.SkipTo(p.White(" ", min=10) ^ gender).setResultsName("name"),
        p.Optional(gender),
        p.Optional(dob),
        p.Optional(linked_case),
        p.Optional(provisional),
        p.Word(p.nums),
    ])

    for key, value in first_case_line_detail.parseString(
            first_case_line[1]).asDict().items():
        data[key] = value.strip()

    return data
Example #14
0
def parse_cosmo_ricc2(text):
    def to_float(s, loc, toks):
        try:
            return float(toks[0])
        except ValueError:
            return 0.

    float_ = pp.Word(pp.nums + ".-").setParseAction(to_float)
    int_ = pp.Word(pp.nums).setParseAction(lambda t: int(t[0]))
    big_sep = pp.Suppress(pp.Word("+="))
    small_sep = pp.Suppress(pp.Word("+-"))
    bar = pp.Suppress(pp.Literal("|"))
    sym = pp.Word(pp.alphanums + "'" + '"' + "*")
    multi = int_
    state = int_
    E_tot = float_
    E_diff = float_
    E_exci = float_
    E_exc= float_
    line = pp.Group(
        bar + sym + bar + multi + bar + state + bar +
        E_tot + bar + E_diff + bar + E_exci + bar + E_exc + bar
    )

    parser = (
        pp.Suppress(pp.SkipTo("E(exc(OCC))/eV|", include=True))
        + big_sep
        + pp.OneOrMore(line + small_sep)
    )


    res = parser.parseString(text)
    return res
Example #15
0
class CronPlugin(interface.SyslogPlugin):
    """A syslog plugin for parsing cron messages."""
    NAME = u'cron'

    DESCRIPTION = u'Parser for syslog cron messages.'

    REPORTER = u'CRON'

    _PYPARSING_COMPONENTS = {
        u'command':
        pyparsing.Combine(
            pyparsing.SkipTo(pyparsing.Literal(u')') +
                             pyparsing.StringEnd())).setResultsName(
                                 u'command'),
        u'username':
        pyparsing.Word(pyparsing.alphanums).setResultsName(u'username'),
    }

    _TASK_RUN_GRAMMAR = (pyparsing.Literal(u'(') +
                         _PYPARSING_COMPONENTS[u'username'] +
                         pyparsing.Literal(u')') + pyparsing.Literal(u'CMD') +
                         pyparsing.Literal(u'(') +
                         _PYPARSING_COMPONENTS[u'command'] +
                         pyparsing.Literal(u')') + pyparsing.StringEnd())

    MESSAGE_GRAMMARS = [(u'task_run', _TASK_RUN_GRAMMAR)]

    def ParseMessage(self, parser_mediator, key, timestamp, tokens):
        """Parses a syslog body that matched one of defined grammars.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the matching grammar.
      timestamp (int): the timestamp, which contains the number of micro seconds
          since January 1, 1970, 00:00:00 UTC or 0 on error.
      tokens (dict[str, str]): tokens derived from a syslog message based on
          the defined grammar.

    Raises:
      AttributeError: If an unknown key is provided.
    """
        # TODO: change AttributeError into ValueError or equiv.
        if key != u'task_run':
            raise AttributeError(u'Unknown grammar key: {0:s}'.format(key))

        event_data = CronTaskRunEventData()
        event_data.body = tokens.get(u'body', None)
        event_data.command = tokens.get(u'command', None)
        event_data.hostname = tokens.get(u'hostname', None)
        # TODO: pass line number to offset or remove.
        event_data.offset = 0
        event_data.pid = tokens.get(u'pid', None)
        event_data.reporter = tokens.get(u'reporter', None)
        event_data.severity = tokens.get(u'severity', None)
        event_data.username = tokens.get(u'username', None)

        event = time_events.TimestampEvent(
            timestamp, definitions.TIME_DESCRIPTION_WRITTEN)
        parser_mediator.ProduceEventWithEventData(event, event_data)
Example #16
0
    def _parse_utilization_tables(util_str: str) -> Dict[str, str]:
        """
        Find all of the section titles and tables in a Vivado utilization report.

        These are returned as a dict with the section titles as keys and the table as the value.
        """

        # Find section headings, discarding the number and following horizontal
        # line. For example:
        #
        # 1.1 Summary of Registers by Type
        # --------------------------------

        sec_num = pp.Suppress(pp.lineStart() + pp.Word(pp.nums + "."))
        sec_title = sec_num + pp.SkipTo(
            pp.lineEnd())("title") + pp.lineEnd().suppress()

        # -------------------------------
        sec_hline = pp.Suppress(pp.lineStart() + pp.Word("-") + pp.lineEnd())
        sec_head = sec_title + sec_hline + pp.lineEnd().suppress()

        # Tables use horizontal lines with like the following to mark column
        # headings and the end of the table:
        #
        # +------+------+-------+

        table_hline = pp.lineStart() + pp.Word("+", "-+") + pp.lineEnd()

        # Tables may just be a header with no data rows, or a full header and
        # data rows, so there will be one or two more horizontal lines.

        data = pp.SkipTo(table_hline, failOn=pp.lineEnd() * 2, include=True)

        table = pp.Combine(table_hline + data * (1, 2))

        section = sec_head + table("table")

        # Make line endings significant
        section.setWhitespaceChars(" \t")

        table_dict = {
            x["title"]: x["table"]
            for x in section.searchString(util_str)
        }

        return table_dict
Example #17
0
def _struct_typedef():
    return (_TYPEDEF +
            (_STRUCT.setResultsName("type") | _UNION.setResultsName("type")) +
            pyparsing.Optional(_IDENTIFIER).setResultsName("id") +
            parsers.anything_in_curly() + pyparsing.Optional(_STAR) +
            _IDENTIFIER.setResultsName("typedef_name") +
            pyparsing.SkipTo(_SEMICOLON) +
            _SEMICOLON).setResultsName("_struct_typedef")
Example #18
0
    def _attributeParser():
        # --- attribute parser ---
        attributeIndicator = p.LineStart() + p.Suppress(p.Literal('@'))
        attributeName = p.Word(p.alphanums).setResultsName('attributename')
        attributeSeparator = p.Suppress(p.Literal('::'))

        # TODO force case insensitivity in attributeMode keyword match
        # TODO add debug names
        # TODO add a conditional debug flag

        attributeMode = (p.Word(MODE_KEYWORD_SINGLE)
                         | p.Word(MODE_KEYWORD_MULTIPLE)).setResultsName(
                             'attributemode') + p.Literal(':').suppress()

        attributeType = (p.Word(
            p.alphanums).setResultsName('attributetype')).setParseAction(caps)

        attributePosargs = p.ZeroOrMore(
            (p.Word(p.alphanums)
             | p.Combine(p.Literal('[') + p.SkipTo(']') + p.Literal(']'))) +
            ~p.FollowedBy(p.Literal('=')) +
            p.Optional(p.Literal(',').suppress())).setResultsName('posargs')

        kwargprintables = p.printables.translate(
            str.maketrans('', '', '=,[]()'))

        attributeKwargs = p.ZeroOrMore(
            p.Group(
                p.Word(p.alphanums).setResultsName('keyword') +
                p.Literal('=').suppress() +
                (p.Word(kwargprintables) | p.Combine(
                    p.Literal('[').suppress() + p.SkipTo(']') +
                    p.Literal(']').suppress())).setResultsName('value') +
                p.Optional(
                    p.Literal(',').suppress()
                )  #TODO figure out how to make quotes work as enclosers instead of []
            )).setResultsName('kwargs')

        attributeArgs = (
            p.Literal('(').suppress() + attributePosargs + attributeKwargs +
            p.Literal(')').suppress()).setResultsName('attributeargs')

        attributeList = attributeIndicator + attributeName + attributeSeparator + \
                        attributeMode + attributeType + p.Optional(attributeArgs)
        return attributeList
Example #19
0
class CronSyslogPlugin(interface.SyslogPlugin):
    """A syslog plugin for parsing cron messages."""

    NAME = 'cron'
    DATA_FORMAT = 'Cron syslog line'

    REPORTER = 'CRON'

    _PYPARSING_COMPONENTS = {
        'command':
        pyparsing.Combine(
            pyparsing.SkipTo(pyparsing.Literal(')') +
                             pyparsing.StringEnd())).setResultsName('command'),
        'username':
        pyparsing.Word(pyparsing.alphanums).setResultsName('username'),
    }

    _TASK_RUN_GRAMMAR = (pyparsing.Literal('(') +
                         _PYPARSING_COMPONENTS['username'] +
                         pyparsing.Literal(')') + pyparsing.Literal('CMD') +
                         pyparsing.Literal('(') +
                         _PYPARSING_COMPONENTS['command'] +
                         pyparsing.Literal(')') + pyparsing.StringEnd())

    MESSAGE_GRAMMARS = [('task_run', _TASK_RUN_GRAMMAR)]

    def ParseMessage(self, parser_mediator, key, date_time, tokens):
        """Parses a syslog body that matched one of defined grammars.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the matching grammar.
      date_time (dfdatetime.DateTimeValues): date and time values.
      tokens (dict[str, str]): tokens derived from a syslog message based on
          the defined grammar.

    Raises:
      ValueError: If an unknown key is provided.
    """
        if key != 'task_run':
            raise ValueError('Unknown grammar key: {0:s}'.format(key))

        event_data = CronTaskRunEventData()
        event_data.body = tokens.get('body', None)
        event_data.command = tokens.get('command', None)
        event_data.hostname = tokens.get('hostname', None)
        # TODO: pass line number to offset or remove.
        event_data.offset = 0
        event_data.pid = tokens.get('pid', None)
        event_data.reporter = tokens.get('reporter', None)
        event_data.severity = tokens.get('severity', None)
        event_data.username = tokens.get('username', None)

        event = time_events.DateTimeValuesEvent(
            date_time, definitions.TIME_DESCRIPTION_WRITTEN)
        parser_mediator.ProduceEventWithEventData(event, event_data)
Example #20
0
    def _add_imports_to_result(self, result: AbstractResult, analysis):
        LOGGER.debug(
            f'extracting imports from base result {result.scanned_file_name}...'
        )
        list_of_words_with_newline_strings = result.scanned_tokens
        source_string_no_comments = self._filter_source_tokens_without_comments(
            list_of_words_with_newline_strings,
            JavaScriptParsingKeyword.INLINE_COMMENT.value,
            JavaScriptParsingKeyword.START_BLOCK_COMMENT.value,
            JavaScriptParsingKeyword.STOP_BLOCK_COMMENT.value)
        filtered_list_no_comments = self.preprocess_file_content_and_generate_token_list_by_mapping(
            source_string_no_comments, self._token_mappings)

        for _, obj, following in self._gen_word_read_ahead(
                filtered_list_no_comments):
            if obj == JavaScriptParsingKeyword.IMPORT.value:
                read_ahead_string = self.create_read_ahead_string(
                    obj, following)

                valid_name = pp.Word(pp.alphanums +
                                     CoreParsingKeyword.AT.value +
                                     CoreParsingKeyword.DOT.value +
                                     CoreParsingKeyword.ASTERISK.value +
                                     CoreParsingKeyword.UNDERSCORE.value +
                                     CoreParsingKeyword.DASH.value +
                                     CoreParsingKeyword.SLASH.value)
                expression_to_match = pp.SkipTo(pp.Literal(JavaScriptParsingKeyword.FROM.value)) + pp.Literal(JavaScriptParsingKeyword.FROM.value) + \
                    pp.Suppress(pp.Literal(CoreParsingKeyword.SINGLE_QUOTE.value)) + \
                    pp.FollowedBy(pp.OneOrMore(valid_name.setResultsName(CoreParsingKeyword.IMPORT_ENTITY_NAME.value)))

                try:
                    parsing_result = expression_to_match.parseString(
                        read_ahead_string)
                except Exception as some_exception:
                    result.analysis.statistics.increment(
                        Statistics.Key.PARSING_MISSES)
                    LOGGER.warning(
                        f'warning: could not parse result {result=}\n{some_exception}'
                    )
                    LOGGER.warning(
                        f'next tokens: {[obj] + following[:AbstractParsingCore.Constants.MAX_DEBUG_TOKENS_READAHEAD.value]}'
                    )
                    continue

                analysis.statistics.increment(Statistics.Key.PARSING_HITS)

                # ignore any dependency substring from the config ignore list
                dependency = getattr(
                    parsing_result,
                    CoreParsingKeyword.IMPORT_ENTITY_NAME.value)
                if self._is_dependency_in_ignore_list(dependency, analysis):
                    LOGGER.debug(
                        f'ignoring dependency from {result.unique_name} to {dependency}'
                    )
                else:
                    result.scanned_import_dependencies.append(dependency)
                    LOGGER.debug(f'adding import: {dependency}')
Example #21
0
    def __init__(self):
        '''
        See `notes.md` for notes on the structure of the file
        and see `pyparsing_notes.md` for notes on pyparsing stuff

        example full lines we are parsing:

        getJsonValue json:string = JsonValue;

        setPollAnswer chat_id:int53 message_id:int53 option_ids:vector<int32> = Ok;

        getInlineQueryResults bot_user_id:int32 chat_id:int53 user_location:location query:string offset:string = InlineQueryResults;
        '''

        # a literal newline
        self.pe_newline = pyparsing.Literal('\n')

        # a semicolon literal
        self.pe_semicolon_literal = pyparsing.Literal(";")

        # a literal colon
        self.pe_colon_literal = pyparsing.Literal(":")

        # literal equal sign
        self.pe_equal_sign_literal = pyparsing.Literal("=")

        # a literal for the start of a comment
        self.pe_comment_literal = pyparsing.Literal('//')

        # token that skips to the end of a line
        # used for the program argument to skip 'N' number of lines
        self.pe_skip_line = pyparsing.SkipTo(pyparsing.lineEnd, include=True)

        # words that can appear in a class name
        self.pe_class_name = pyparsing.Word(pyparsing.alphanums)

        # characters that appear in a parameter name or type
        self.pe_param_name = pyparsing.Word(pyparsing.alphanums + "_")

        # need the angle brackets for stuff like `vector<String>`
        self.pe_param_type = pyparsing.Word(pyparsing.alphanums + "<>")

        # a single param and type pair
        # so like `message:string`
        self.pe_param_listing = pyparsing.Group(
            self.pe_param_name(constants.RESULT_NAME_PARAM_NAME) +
            self.pe_colon_literal.suppress() +
            self.pe_param_type(constants.RESULT_NAME_PARAM_TYPE))

        # grouping of zero or more parameters
        self.pe_zero_or_more_params = pyparsing.ZeroOrMore(
            self.pe_param_listing(f"{constants.RESULT_NAME_PARAMS}*"))

        # the actual name of the class/type that is being defined
        self.pe_tdlib_class_name = self.pe_class_name(
            constants.RESULT_NAME_CLASS_OR_FUNCTION_NAME)
Example #22
0
    def parse_buffer(cls, sensor_uuid, buf):
        # Defining generic pyparsing objects.
        integer = pyp.Word(pyp.nums)
        ip_addr = pyp.Combine(integer + '.' + integer+ '.' + integer + '.' + integer)
        port = pyp.Suppress(':') + integer
        # Defining pyparsing objects from expected format:
        #
        #    [**] [1:160:2] COMMUNITY SIP TCP/IP message flooding directed to SIP proxy [**]
        #    [Classification: Attempted Denial of Service] [Priority: 2]
        #    01/10-00:08:23.598520 201.233.20.33:63035 -> 192.234.122.1:22
        #    TCP TTL:53 TOS:0x10 ID:2145 IpLen:20 DgmLen:100 DF
        #    ***AP*** Seq: 0xD34C30CE  Ack: 0x6B1F7D18  Win: 0x2000  TcpLen: 32
        #
        # Note: This format is known to change over versions.
        # Works with Snort version 2.9.2 IPv6 GRE (Build 78)

        header = (
            pyp.Suppress("[**] [")
            + pyp.Combine(integer + ":" + integer + ":" + integer)
            + pyp.Suppress("]")
        )
        signature = (
            pyp.Combine(pyp.SkipTo("[**]", include=False)) + pyp.Suppress("[**]")
        )
        classif = (
            pyp.Suppress(pyp.Literal("[Classification:")) + pyp.Regex("[^]]*") + pyp.Suppress(']')
        )
        pri = pyp.Suppress("[Priority:") + integer + pyp.Suppress("]")
        date = pyp.Combine(
            # day/month/year (year is optional, depends on snort being started with -y)
            integer + "/" + integer + pyp.Optional(pyp.Combine("/" + integer), default="/"+str(datetime.now().year)[2:4]) + \
            '-' + integer + ':' + integer + ':' + integer + '.' + integer
        )
        src_ip = ip_addr 
        src_port = port 
        arrow = pyp.Suppress("->")
        dest_ip = ip_addr
        dest_port = port
        proto = pyp.Regex("\S+")

        bnf = header + signature + pyp.Optional(classif, default='') + pri + date + \
            src_ip + pyp.Optional(src_port, default='') + arrow + dest_ip + \
            pyp.Optional(dest_port, default='') + proto

        fields = bnf.searchString(buf)
        if fields:
            if abs(datetime.utcnow() -  datetime.now()).total_seconds() > 1:
                # Since snort doesn't log in UTC, a correction is needed to
                # convert the logged time to UTC. The following code calculates
                # the delta between local time and UTC and uses it to convert
                # the logged time to UTC. Additional time formatting  makes
                # sure the previous code doesn't break.
                fields[0] = [f.strip() for f in fields[0]]
            return cls(sensor_uuid, *fields[0])
        else:
            return None
Example #23
0
def parse_spectre(netlist_string):
    # newlines are part of the grammar, thus redifine the whitespaces without it
    ws = ' \t'
    _p.ParserElement.setDefaultWhitespaceChars(ws)

    # spectre netlist grammar definition
    EOL = _p.LineEnd().suppress()  # end of line
    linebreak = _p.Suppress(
        "\\" + _p.LineEnd())  # breaking a line with backslash newline
    identifier = _p.Word(_p.alphanums + '_!<>-+')  # a name for...
    number = _p.Word(_p.nums + ".")  # a number
    net = identifier  # a net
    nets = _p.Group(_p.OneOrMore(net('net') | linebreak))  # many nets
    cktname = identifier  # name of a subcircuit
    cktname_end = _p.Keyword("ends").suppress()
    comment = _p.Suppress("//" + _p.SkipTo(_p.LineEnd()))
    expression = _p.Word(_p.alphanums + '._*+-/()')
    inst_param_key = identifier + _p.Suppress("=")
    inst_param_value = expression('expression')
    inst_parameter = _p.Group(
        inst_param_key('name') +
        inst_param_value('value')).setResultsName('key')
    parameters = _p.Group(
        _p.ZeroOrMore(inst_parameter | linebreak)).setResultsName('parameters')
    instref = identifier
    instname = identifier
    instance = _p.Group(
        instname('name') + _p.Suppress('(') + nets('nets') + _p.Suppress(')') +
        instref('reference') + parameters + EOL).setResultsName('instance')
    subcircuit_content = _p.Group(
        _p.ZeroOrMore(instance | EOL | comment)).setResultsName('subnetlist')
    subcircuit = _p.Group(
        # matches subckt <name> <nets> <newline>
        _p.Keyword("subckt").suppress() + cktname('name') + nets('nets') + EOL
        # matches the content of the subcircuit
        + subcircuit_content
        # matches ends <name> <newline>
        + cktname_end + _p.matchPreviousExpr(cktname).suppress() +
        EOL).setResultsName('subcircuit')
    topcircuit = _p.Group(
        # matches subckt <name> <nets> <newline>
        _p.Keyword("topckt").suppress() + cktname('name') + nets('nets') + EOL
        # matches the content of the subcircuit
        + subcircuit_content
        # matches ends <name> <newline>
        + cktname_end + _p.matchPreviousExpr(cktname).suppress() +
        EOL).setResultsName('topcircuit')
    netlist_element = subcircuit | topcircuit | EOL | comment('comment')
    netlist = _p.ZeroOrMore(netlist_element) + _p.StringEnd()

    parameters.setParseAction(handle_parameters)
    instance.setParseAction(handle_instance)
    subcircuit.setParseAction(handle_subcircuit)
    topcircuit.setParseAction(handle_topcircuit)

    return netlist.parseString(netlist_string)
Example #24
0
    def __parse_netem_param(self, line, parse_param_name, word_pattern):
        pattern = (pp.SkipTo(parse_param_name, include=True) +
                   pp.Word(word_pattern))

        try:
            result = pattern.parseString(_to_unicode(line))[-1]
            if dataproperty.is_not_empty_string(result):
                self.__parsed_param[parse_param_name] = result
        except pp.ParseException:
            pass
Example #25
0
def table_row(start_tag, end_tag):
    body = pp.SkipTo(end_tag)
    body.addParseAction(pp.tokenMap(str.strip),
                        pp.tokenMap(strip_html))
    row = pp.Group(tr.suppress()
                   + pp.ZeroOrMore(start_tag.suppress()
                                   + body
                                   + end_tag.suppress())
                   + tr_end.suppress())
    return row
Example #26
0
    def __parse_netem_delay_distro(self, line):
        parse_param_name = "delay"
        pattern = (pp.SkipTo(parse_param_name, include=True) +
                   pp.Word(pp.nums + ".msu") + pp.Word(pp.nums + ".msu"))

        try:
            parsed_list = pattern.parseString(line)
            self.__parsed_param[parse_param_name] = parsed_list[2]
            self.__parsed_param["delay-distro"] = parsed_list[3]
        except pp.ParseException:
            pass
Example #27
0
    def _parse_duplicate(self, line):
        packet_pattern = (
            pp.SkipTo(pp.Word(pp.nums) + pp.Literal("duplicates,")) +
            pp.Word(pp.nums) + pp.Literal("duplicates,"))
        try:
            duplicate_parse_list = packet_pattern.parseString(
                _to_unicode(line))
        except pp.ParseException:
            return 0

        return int(duplicate_parse_list[-2])
Example #28
0
def getUrls(lines=[]):

    grammer = ''

    astart,aend = pyparsing.makeHTMLTags('a')
    grammer = astart + pyparsing.SkipTo(aend) + aend.suppress()

    urls = []
    for x1,x2,x3 in grammer.scanString(''.join(lines)):
        urls.append(str(x1[1][1]))
    return urls
Example #29
0
    def __parse_bandwidth_rate(self, line):
        parse_param_name = "rate"
        pattern = pp.SkipTo(parse_param_name, include=True) + pp.Word(pp.alphanums + "." + ":")

        try:
            result = pattern.parseString(line)[-1]
            if typepy.is_not_null_string(result):
                result = result.rstrip("bit")
                self.__parsed_param[parse_param_name] = result
        except pp.ParseException:
            pass
Example #30
0
    def __parse_netem_param(self, line, parse_param_name, word_pattern, key_name=None):
        pattern = pp.SkipTo(parse_param_name, include=True) + pp.Word(word_pattern)
        if not key_name:
            key_name = parse_param_name

        try:
            result = pattern.parseString(line)[-1]
            if typepy.is_not_null_string(result):
                self.__parsed_param[key_name] = result
        except pp.ParseException:
            pass