def data(self): options = self.options if self.filter: filtered_data = [] types = options["types"] parameters = self.filter.parameters condition = self.filter.conditions[0] for item in self.datalab.data: if all([ did_pass_test( condition.formulas[parameter_index], item.get(parameter), types.get(parameter), ) for parameter_index, parameter in enumerate(parameters) ]): filtered_data.append(item) else: filtered_data = self.datalab.data column_order = [] from datalab.serializers import OrderItemSerializer order = OrderItemSerializer(self.datalab.order, many=True, context={ "steps": self.datalab.steps }).data for item in order: if item["details"]["field_type"] == "checkbox-group": column_order.extend(item["details"]["fields"]) else: column_order.append(item["details"]["label"]) return { "records": filtered_data, "order": column_order, "unfilteredLength": len(self.datalab.data), "filteredLength": len(filtered_data), }
def dump_datalab_data(**kwargs): from datalab.serializers import OrderItemSerializer dump = Dump.objects.all() if not len(dump) > 0: return "No DataLabs were marked for data dump" if not DATALAB_DUMP_BUCKET: return "No DataLab dump bucket has been specified" dump = dump[0] dump.last_run = dt.utcnow() dump.save() if AWS_PROFILE: session = boto3.Session(profile_name=AWS_PROFILE) s3 = session.resource("s3") else: s3 = boto3.resource("s3") for datalab in dump.datalabs: datalab = Datalab.objects.get(id=datalab.id) data = pd.DataFrame(datalab.data) csv_buffer = StringIO() # Re-order the columns to match the original datasource data order = OrderItemSerializer(datalab.order, many=True, context={"steps": datalab.steps}) reordered_columns = [ item.get("details", {}).get("label") for item in order.data ] data = data.reindex(columns=reordered_columns) data.to_csv(csv_buffer, index=False) s3.Object(DATALAB_DUMP_BUCKET, f"{datalab.container.code}_{datalab.name}.csv").put( Body=csv_buffer.getvalue()) return "DataLab data dumped successfully"
def filter_details(self, filters): """ Function used in Serializers to get filter_details Input filters - Table Filter Details Output filter_details - Contains filtered data & other information - dataNum: Number of rows in data - paginationTotal: Number of rows in data (used for pagination) - filters: - Column Label with list of { text, value } for each column - filteredData: - The actual table data - groups: List of {text value} for groupby dropdown (essentially another filter) """ data = self.data if filters is None: filters = {} df = pd.DataFrame.from_dict(data) # Grab Column Information to help with filtering because the filter algorithm depends on the column type from datalab.serializers import OrderItemSerializer columns = OrderItemSerializer( self.order, many=True, context={"steps": self.steps} ).data group_column = next(column for column in columns if column['details']['label'] == self.groupBy) if self.groupBy is not None else None # Perform Actual Filtering filtered_data, pagination_total = get_filtered_data(data, columns, filters, self.groupBy) return { 'dataNum': len(data), 'paginationTotal': pagination_total, 'filters': get_filters(df, columns), 'filteredData': filtered_data, 'groups': get_column_filter(df, group_column) }
def populate_content(self, content=None): if not content and not self.content: return [] elif not content: content = self.content filtered_data = self.data["records"] types = self.options["types"] # Assign each record to the rule groups populated_rules = defaultdict(set) for item_index, item in enumerate(filtered_data): for rule in self.rules: parameters = rule.parameters did_match = False for condition in rule.conditions: if all([ did_pass_test( condition.formulas[parameter_index], item.get(parameter), types.get(parameter), ) for parameter_index, parameter in enumerate( parameters) ]): did_match = True populated_rules[condition.conditionId].add(item_index) break if not did_match: populated_rules[rule.catchAll].add(item_index) block_map = content["blockMap"]["document"]["nodes"] html = content["html"] result = [] from datalab.serializers import OrderItemSerializer order = OrderItemSerializer(self.datalab.order, many=True, context={ "steps": self.datalab.steps }).data # Populate the content for each record for item_index, item in enumerate(filtered_data): populated_content = "" for block_index, block in enumerate(block_map): if block["type"] == "condition": condition_id = block["data"]["conditionId"] if item_index in populated_rules.get( ObjectId(condition_id), {}): populated_content += parse_content_line( html[block_index], item, order) else: populated_content += parse_content_line( html[block_index], item, order) result.append(populated_content) return result
def populate_content(self, content=None, email=False): if not content and not self.content: return [] elif not content: content = self.content filtered_data = self.data["records"] types = self.options["types"] # Assign each student record to the rule groups populated_rules = defaultdict(set) for item_index, item in enumerate(filtered_data): for rule in self.rules: parameters = rule.parameters did_match = False for condition in rule.conditions: if all([ did_pass_test( condition.formulas[parameter_index], item.get(parameter), types.get(parameter), ) for parameter_index, parameter in enumerate( parameters) ]): did_match = True populated_rules[condition.conditionId].add(item_index) break if not did_match: populated_rules[rule.catchAll].add(item_index) result = [] from datalab.serializers import OrderItemSerializer order = OrderItemSerializer(self.datalab.order, many=True, context={ "steps": self.datalab.steps }).data condition_ids = list(set(re.findall(r"conditionid=\"(.*?)\"", content))) condition_tag_locations = generate_condition_tag_locations(content) forms = Form.objects.filter(datalab=self.datalab) """ Generate HTML string for each student based on conditions and attributes Algo: 1. Delete condition blocks that do not match the student attributes - Get a list of deleteIndexes of (start,stop) slices of condition tags to delete - Perform the iterative deletion 2. Clean the HTML (replace <attribute>, <condition>, <rule>) to actual HTML tags """ for item_index, item in enumerate(filtered_data): html = content # 1 deleteIndexes = [] for condition_id in condition_ids: if not item_index in populated_rules.get( ObjectId(condition_id), {}): deleteIndexes += condition_tag_locations[condition_id] html = delete_html_by_indexes(html, deleteIndexes) # 2 html = strip_tags(html, "condition") html = strip_tags(html, "rule") html = parse_attribute(html, item, order, forms) # html = parse_link(html, item, order, self.id, job_id) if email is False: # normal case: parse the link to a simple <a> </a> html = simple_parse_link(html, item, order) result.append(html) return result