Exemple #1
0
def get_temporal_partitions(conf):
    """
    Given the database configuration, construct the sql to be executed to
    partition a table (set in the configuration) by dates, using a
    TemporalDataPartitioner
    :param dict[str, T] conf: the dict representation of the database
    configuration
    :return: The string representation of the sql to be executed to partition
    a table by a time period.
    """
    from datetime import date

    since = conf['data_partition']['since']
    until = conf['data_partition']['until']

    if isinstance(since, date):
        since = datetime(*since.timetuple()[:6])

    if isinstance(until, date):
        until = datetime(*until.timetuple()[:6])

    return str(TemporalDataPartitioner(
        conf['partition_table'],
        conf['partition_field'],
        time_window=TimePeriod(
            since,
            until
        ),
        partition_by=conf['partition_by'],
        index_by=conf['data_partition']['index_by'],
        template_path=conf['template_folder'] or TEMPLATE_FOLDER,
        template_name=conf['data_partition']['template']
    )
    )
    def test_to_dict_partition_by_week_strict(self):
        self.start = datetime(2019, 12, 1)
        self.end = (self.start +
                    timedelta(days=10)).replace(microsecond=999999)
        self.time_window = TimePeriod(self.start, self.end)

        self.instance = TemporalDataPartitioner(
            self.parent_table,
            self.partition_field,
            self.time_window,
            partition_by=PartitionByEnum.w,
            index_by=self.index_by,
            template_path=self.template_path,
            template_name=self.template_name,
            strict=True)
        results = self.instance.to_dict()
        f_start = self.start.strftime("%Y-%m-%d %H:%M:%S")
        f_end = self.end.strftime("%Y-%m-%d %H:%M:%S.%f")

        expected_results = {
            'name':
            'parent_table',
            'partition_prefix':
            'parent_table_y2019_w',
            'partitions': [],
            'catch_all_partition_name':
            'parent_table_catch_all',
            'partitioned_by':
            'week',
            'partition_field':
            'partition_field',
            'field_value':
            f'cast(extract(week from NEW.{self.partition_field}) AS TEXT)',
            'self_check':
            f"NEW.{self.partition_field} >= '{f_start}' "
            f"AND NEW.{self.partition_field} <= '{f_end}' "
        }

        self.assertTrue(len(results.keys()) == len(expected_results.keys()))
        for k, v in results.items():
            if k != 'partitions':
                print(results[k])
                print(expected_results[k])
                self.assertTrue(results[k] == expected_results[k])
    def setUp(self) -> None:
        self.parent_table = 'parent_table'
        self.partition_field = 'partition_field'
        self.partition_by = PartitionByEnum.w
        self.index_by = list('abc')
        self.template_path = get_default_data_path()
        self.template_name = 'sample_template.jinja2'
        self.start = datetime(2019, 12, 1).replace(hour=0, minute=0, second=0)
        self.end = (self.start + timedelta(days=10))
        self.time_window = TimePeriod(self.start, self.end)

        self.instance = TemporalDataPartitioner(
            self.parent_table,
            self.partition_field,
            self.time_window,
            partition_by=self.partition_by,
            index_by=self.index_by,
            template_path=self.template_path,
            template_name=self.template_name)
Exemple #4
0
def get_archive_script(since, until, parent_table='request_sets'):
    """
    Creates an Archiver based on the configuration and returns the parsed sql
    for the archive table creation and the partition detachment/ re-attachment
    to the archive table
    :param datetime.datetime since: since when to archive
    :param datetime.datetime until: until when to archive
    :param str parent_table:
    :return: the string representation of the sql archive script to be executed
    """
    archiver = TemporalArchiver(
        parent_table,
        'created_at',
        TimePeriod(
            since,
            until,
        ),
    )

    return str(archiver)
Exemple #5
0
    parser.add_argument(
        '-i', '--indexby', help='Index the data', dest='indexby',
        type=lambda s: [str(item) for item in s.split(',')]
    )
    args = parser.parse_args()

    if args.ptype == 'temporal':
        if not args.since or not args.until or not args.splitby:
            raise ValueError(
                'Temporal partitioner needs since and until and splitby'
            )

    tp = TemporalDataPartitioner(
        args.parentname,
        args.field,
        time_window=TimePeriod(args.since, args.until),
        partition_by=args.splitby,
        index_by=args.indexby
    )

    print(tp.partitions)
    # Create the jinja2 environment - trim_blocks helps control whitespace.
    j2_env = Environment(loader=FileSystemLoader(TEMPLATE_FOLDER),
                         trim_blocks=True)
    # get template and render
    template = j2_env.get_template("data_partitioning.jinja2")
    template.globals['now'] = datetime.utcnow
    rendered_data = template.render(tp.to_dict())

    with open('test.sql', 'wb') as out_file:
        out_file.write(bytes(rendered_data.encode('utf-8')))
Exemple #6
0
                        help='Split the data by either week or month')
    parser.add_argument('-s',
                        '--since',
                        help='Start date of archive period',
                        type=dateutil.parser.parse)
    parser.add_argument('-u',
                        '--until',
                        help='End date of archive period',
                        type=dateutil.parser.parse)

    parser.add_argument('-i',
                        '--indexby',
                        help='Index the data',
                        dest='indexby',
                        type=lambda s: [str(item) for item in s.split(',')])
    args = parser.parse_args()

    if not args.since or not args.until:
        raise ValueError('Archiver needs since and until')
    if args.splitby not in ['week', 'month']:
        raise NotImplementedError()

    tp = TemporalArchiver(args.parentname,
                          args.field,
                          archive_period=TimePeriod(args.since, args.until),
                          partition_by=args.splitby,
                          index_by=args.indexby)

    with open('archive_test.sql', 'w') as f:
        f.write(str(tp))