def get_temporal_partitions(conf): """ Given the database configuration, construct the sql to be executed to partition a table (set in the configuration) by dates, using a TemporalDataPartitioner :param dict[str, T] conf: the dict representation of the database configuration :return: The string representation of the sql to be executed to partition a table by a time period. """ from datetime import date since = conf['data_partition']['since'] until = conf['data_partition']['until'] if isinstance(since, date): since = datetime(*since.timetuple()[:6]) if isinstance(until, date): until = datetime(*until.timetuple()[:6]) return str(TemporalDataPartitioner( conf['partition_table'], conf['partition_field'], time_window=TimePeriod( since, until ), partition_by=conf['partition_by'], index_by=conf['data_partition']['index_by'], template_path=conf['template_folder'] or TEMPLATE_FOLDER, template_name=conf['data_partition']['template'] ) )
def test_to_dict_partition_by_week_strict(self): self.start = datetime(2019, 12, 1) self.end = (self.start + timedelta(days=10)).replace(microsecond=999999) self.time_window = TimePeriod(self.start, self.end) self.instance = TemporalDataPartitioner( self.parent_table, self.partition_field, self.time_window, partition_by=PartitionByEnum.w, index_by=self.index_by, template_path=self.template_path, template_name=self.template_name, strict=True) results = self.instance.to_dict() f_start = self.start.strftime("%Y-%m-%d %H:%M:%S") f_end = self.end.strftime("%Y-%m-%d %H:%M:%S.%f") expected_results = { 'name': 'parent_table', 'partition_prefix': 'parent_table_y2019_w', 'partitions': [], 'catch_all_partition_name': 'parent_table_catch_all', 'partitioned_by': 'week', 'partition_field': 'partition_field', 'field_value': f'cast(extract(week from NEW.{self.partition_field}) AS TEXT)', 'self_check': f"NEW.{self.partition_field} >= '{f_start}' " f"AND NEW.{self.partition_field} <= '{f_end}' " } self.assertTrue(len(results.keys()) == len(expected_results.keys())) for k, v in results.items(): if k != 'partitions': print(results[k]) print(expected_results[k]) self.assertTrue(results[k] == expected_results[k])
def setUp(self) -> None: self.parent_table = 'parent_table' self.partition_field = 'partition_field' self.partition_by = PartitionByEnum.w self.index_by = list('abc') self.template_path = get_default_data_path() self.template_name = 'sample_template.jinja2' self.start = datetime(2019, 12, 1).replace(hour=0, minute=0, second=0) self.end = (self.start + timedelta(days=10)) self.time_window = TimePeriod(self.start, self.end) self.instance = TemporalDataPartitioner( self.parent_table, self.partition_field, self.time_window, partition_by=self.partition_by, index_by=self.index_by, template_path=self.template_path, template_name=self.template_name)
def get_archive_script(since, until, parent_table='request_sets'): """ Creates an Archiver based on the configuration and returns the parsed sql for the archive table creation and the partition detachment/ re-attachment to the archive table :param datetime.datetime since: since when to archive :param datetime.datetime until: until when to archive :param str parent_table: :return: the string representation of the sql archive script to be executed """ archiver = TemporalArchiver( parent_table, 'created_at', TimePeriod( since, until, ), ) return str(archiver)
parser.add_argument( '-i', '--indexby', help='Index the data', dest='indexby', type=lambda s: [str(item) for item in s.split(',')] ) args = parser.parse_args() if args.ptype == 'temporal': if not args.since or not args.until or not args.splitby: raise ValueError( 'Temporal partitioner needs since and until and splitby' ) tp = TemporalDataPartitioner( args.parentname, args.field, time_window=TimePeriod(args.since, args.until), partition_by=args.splitby, index_by=args.indexby ) print(tp.partitions) # Create the jinja2 environment - trim_blocks helps control whitespace. j2_env = Environment(loader=FileSystemLoader(TEMPLATE_FOLDER), trim_blocks=True) # get template and render template = j2_env.get_template("data_partitioning.jinja2") template.globals['now'] = datetime.utcnow rendered_data = template.render(tp.to_dict()) with open('test.sql', 'wb') as out_file: out_file.write(bytes(rendered_data.encode('utf-8')))
help='Split the data by either week or month') parser.add_argument('-s', '--since', help='Start date of archive period', type=dateutil.parser.parse) parser.add_argument('-u', '--until', help='End date of archive period', type=dateutil.parser.parse) parser.add_argument('-i', '--indexby', help='Index the data', dest='indexby', type=lambda s: [str(item) for item in s.split(',')]) args = parser.parse_args() if not args.since or not args.until: raise ValueError('Archiver needs since and until') if args.splitby not in ['week', 'month']: raise NotImplementedError() tp = TemporalArchiver(args.parentname, args.field, archive_period=TimePeriod(args.since, args.until), partition_by=args.splitby, index_by=args.indexby) with open('archive_test.sql', 'w') as f: f.write(str(tp))