コード例 #1
0
    def transform_resource(self, resource):
        target = resource
        source = self.get("resource")
        field_names = self.get("fieldNames")
        ignore_fields = self.get("ignoreFields")
        sort_by_field = self.get("sortByField")
        if isinstance(source, str):
            source = target.package.get_resource(source)
        source.infer()
        view1 = target.to_petl()
        view2 = source.to_petl()

        # Ignore fields
        if ignore_fields:
            for field in source.schema.fields[len(target.schema.fields):]:
                target.schema.add_field(field)
            resource.data = petl.stack(view1, view2)

        # Default
        else:
            for field in source.schema.fields:
                if field.name not in target.schema.field_names:
                    target.schema.add_field(field)
            if field_names:
                for field in list(target.schema.fields):
                    if field.name not in field_names:
                        target.schema.remove_field(field.name)
            if sort_by_field:
                key = sort_by_field
                resource.data = petl.mergesort(view1,
                                               view2,
                                               key=key,
                                               header=field_names)
            else:
                resource.data = petl.cat(view1, view2, header=field_names)
コード例 #2
0
    def transform_resource(self, source, target):
        if isinstance(self.__resource, str):
            self.__resource = source.package.get_resource(self.__resource)
        self.__resource.infer(only_sample=True)
        view1 = source.to_petl()
        view2 = self.__resource.to_petl()

        # Ignore fields
        if self.__ignore_fields:
            target.data = petl.stack(view1, view2)
            for field in self.__resource.schema.fields[len(target.schema.fields
                                                           ):]:
                target.schema.add_field(field)

        # Default
        else:
            if self.__sort:
                target.data = petl.mergesort(view1,
                                             view2,
                                             key=self.__sort,
                                             header=self.__field_names)
            else:
                target.data = petl.cat(view1, view2, header=self.__field_names)
            for field in self.__resource.schema.fields:
                if field.name not in target.schema.field_names:
                    target.schema.add_field(field)
            if self.__field_names:
                for field in list(target.schema.fields):
                    if field.name not in self.__field_names:
                        target.schema.remove_field(field.name)
コード例 #3
0
ファイル: test_etl.py プロジェクト: shepardjma/parsons
    def test_stack(self):
        tbl1 = self.tbl
        tbl2 = Table([{'first': 'Mary', 'last': 'Nichols'}])
        # Different column names shouldn't matter for stack()
        tbl3 = Table([{'f': 'Lucy', 'l': 'Peterson'}])
        tbl1.stack(tbl2, tbl3)

        expected_tbl = Table(petl.stack(tbl1.table, tbl2.table, tbl3.table))
        assert_matching_tables(expected_tbl, tbl1)
コード例 #4
0
ファイル: etl.py プロジェクト: kcym-3c/parsons
    def stack(self, *tables, missing=None):
        """
        Stack Parsons tables on top of one another.

        Similar to ``table.concat()``, except no attempt is made to align fields from
        different tables.

        `Args:`
            tables: Parsons Table or list
                A single table, or a list of tables
            missing: bool
                The value to use when padding missing values
        `Returns:`
            ``None``
        """

        if type(tables) not in [list, tuple]:
            tables = [tables]
        petl_tables = [tbl.table for tbl in tables]

        self.table = petl.stack(self.table, *petl_tables, missing=missing)
コード例 #5
0
ファイル: stockCrawler.py プロジェクト: Why-Not-Sky/hunting
def get_historical_quotes(trade_date= '20160701'):
    tse = get_historical_quotes_tse(trade_date=trade_date)
    otc = get_historical_quotes_otc(trade_date=trade_date)

    table = etl.stack(tse, otc)
    return (table)