コード例 #1
0
ファイル: mr_tools.py プロジェクト: zeantsoi/reddit
    def process(thing_id, vals):
        data = {}
        if defaults:
            data.update(defaults)
        thing = None

        for val in vals:
            if val[0] == 'thing':
                thing = format_dataspec(
                    val,
                    [
                        'data_type',  # e.g. 'thing'
                        'thing_type',  # e.g. 'link'
                        'ups',
                        'downs',
                        'deleted',
                        'spam',
                        'timestamp'
                    ])
            elif val[0] == 'data':
                val = format_dataspec(
                    val,
                    [
                        'data_type',  # e.g. 'data'
                        'thing_type',  # e.g. 'link'
                        'key',  # e.g. 'sr_id'
                        'value'
                    ])
                if val.key in fields:
                    data[val.key] = val.value

        if (
                # silently ignore if we didn't see the 'thing' row
                thing is not None

                # remove spam and deleted as appriopriate
                and (deleted or thing.deleted == 'f') and
            (spam or thing.spam == 'f')

                # and silently ignore items that don't have all of the
                # data that we need
                and all(field in data for field in fields)):

            counters['processed'] += 1
            yield ((thing_id, thing.thing_type, thing.ups, thing.downs,
                    thing.deleted, thing.spam, thing.timestamp) +
                   tuple(data[field] for field in fields))
        else:
            counters['skipped'] += 1
コード例 #2
0
ファイル: mr_tools.py プロジェクト: zeantsoi/reddit
    def process(thing_id, vals):
        data = {}
        if defaults:
            data.update(defaults)
        thing = None

        for val in vals:
            if val[0] == 'thing':
                thing = format_dataspec(val,
                                        ['data_type', # e.g. 'thing'
                                         'thing_type', # e.g. 'link'
                                         'ups',
                                         'downs',
                                         'deleted',
                                         'spam',
                                         'timestamp'])
            elif val[0] == 'data':
                val = format_dataspec(val,
                                      ['data_type', # e.g. 'data'
                                       'thing_type', # e.g. 'link'
                                       'key', # e.g. 'sr_id'
                                       'value'])
                if val.key in fields:
                    data[val.key] = val.value

        if (
            # silently ignore if we didn't see the 'thing' row
            thing is not None

            # remove spam and deleted as appriopriate
            and (deleted or thing.deleted == 'f')
            and (spam or thing.spam == 'f')

            # and silently ignore items that don't have all of the
            # data that we need
            and all(field in data for field in fields)):

            counters['processed'] += 1
            yield ((thing_id, thing.thing_type, thing.ups, thing.downs,
                    thing.deleted, thing.spam, thing.timestamp)
                   + tuple(data[field] for field in fields))
        else:
            counters['skipped'] += 1
コード例 #3
0
ファイル: mr_tools.py プロジェクト: B-DAP/reddit
    def process(thing_id, vals):
        data = {}
        thing = None

        for val in vals:
            if val[0] == "thing":
                thing = format_dataspec(
                    val,
                    [
                        "data_type",  # e.g. 'thing'
                        "thing_type",  # e.g. 'link'
                        "ups",
                        "downs",
                        "deleted",
                        "spam",
                        "timestamp",
                    ],
                )
            elif val[0] == "data":
                val = format_dataspec(
                    val, ["data_type", "thing_type", "key", "value"]  # e.g. 'data'  # e.g. 'link'  # e.g. 'sr_id'
                )
                if val.key in fields:
                    data[val.key] = val.value

        if (
            # silently ignore if we didn't see the 'thing' row
            thing is not None
            # remove spam and deleted as appriopriate
            and (deleted or thing.deleted == "f")
            and (spam or thing.spam == "f")
            # and silently ignore items that don't have all of the
            # data that we need
            and all(field in data for field in fields)
        ):

            yield (
                (thing_id, thing.thing_type, thing.ups, thing.downs, thing.deleted, thing.spam, thing.timestamp)
                + tuple(data[field] for field in fields)
            )