Ejemplo n.º 1
0
    def data(self):
        options = self.options

        if self.filter:
            filtered_data = []

            types = options["types"]
            parameters = self.filter.parameters
            condition = self.filter.conditions[0]

            for item in self.datalab.data:
                if all([
                        did_pass_test(
                            condition.formulas[parameter_index],
                            item.get(parameter),
                            types.get(parameter),
                        )
                        for parameter_index, parameter in enumerate(parameters)
                ]):
                    filtered_data.append(item)

        else:
            filtered_data = self.datalab.data

        column_order = []
        from datalab.serializers import OrderItemSerializer
        order = OrderItemSerializer(self.datalab.order,
                                    many=True,
                                    context={
                                        "steps": self.datalab.steps
                                    }).data

        for item in order:
            if item["details"]["field_type"] == "checkbox-group":
                column_order.extend(item["details"]["fields"])
            else:
                column_order.append(item["details"]["label"])

        return {
            "records": filtered_data,
            "order": column_order,
            "unfilteredLength": len(self.datalab.data),
            "filteredLength": len(filtered_data),
        }
Ejemplo n.º 2
0
def dump_datalab_data(**kwargs):
    from datalab.serializers import OrderItemSerializer

    dump = Dump.objects.all()
    if not len(dump) > 0:
        return "No DataLabs were marked for data dump"

    if not DATALAB_DUMP_BUCKET:
        return "No DataLab dump bucket has been specified"

    dump = dump[0]
    dump.last_run = dt.utcnow()
    dump.save()

    if AWS_PROFILE:
        session = boto3.Session(profile_name=AWS_PROFILE)
        s3 = session.resource("s3")
    else:
        s3 = boto3.resource("s3")

    for datalab in dump.datalabs:
        datalab = Datalab.objects.get(id=datalab.id)
        data = pd.DataFrame(datalab.data)
        csv_buffer = StringIO()

        # Re-order the columns to match the original datasource data
        order = OrderItemSerializer(datalab.order,
                                    many=True,
                                    context={"steps": datalab.steps})
        reordered_columns = [
            item.get("details", {}).get("label") for item in order.data
        ]
        data = data.reindex(columns=reordered_columns)

        data.to_csv(csv_buffer, index=False)
        s3.Object(DATALAB_DUMP_BUCKET,
                  f"{datalab.container.code}_{datalab.name}.csv").put(
                      Body=csv_buffer.getvalue())

    return "DataLab data dumped successfully"
Ejemplo n.º 3
0
    def filter_details(self, filters):
        """
        Function used in Serializers to get filter_details
        Input
        filters - Table Filter Details

        Output
        filter_details - Contains filtered data & other information
            - dataNum: Number of rows in data
            - paginationTotal: Number of rows in data (used for pagination)
            - filters:
                - Column Label with list of { text, value } for each column
            - filteredData:
                - The actual table data
            - groups: List of {text value} for groupby dropdown (essentially another filter)
        """
        data = self.data
        if filters is None: filters = {}
        df = pd.DataFrame.from_dict(data)

        # Grab Column Information to help with filtering because the filter algorithm depends on the column type
        from datalab.serializers import OrderItemSerializer
        columns = OrderItemSerializer(
            self.order, many=True, context={"steps": self.steps}
        ).data

        group_column = next(column for column in columns if column['details']['label'] == self.groupBy) if self.groupBy is not None else None

        # Perform Actual Filtering
        filtered_data, pagination_total = get_filtered_data(data, columns, filters, self.groupBy)

        return {
            'dataNum': len(data),
            'paginationTotal': pagination_total,
            'filters': get_filters(df, columns),
            'filteredData': filtered_data,
            'groups': get_column_filter(df, group_column)
        }
Ejemplo n.º 4
0
    def populate_content(self, content=None):
        if not content and not self.content:
            return []
        elif not content:
            content = self.content

        filtered_data = self.data["records"]
        types = self.options["types"]

        # Assign each record to the rule groups
        populated_rules = defaultdict(set)
        for item_index, item in enumerate(filtered_data):
            for rule in self.rules:
                parameters = rule.parameters
                did_match = False

                for condition in rule.conditions:
                    if all([
                            did_pass_test(
                                condition.formulas[parameter_index],
                                item.get(parameter),
                                types.get(parameter),
                            ) for parameter_index, parameter in enumerate(
                                parameters)
                    ]):
                        did_match = True
                        populated_rules[condition.conditionId].add(item_index)
                        break

                if not did_match:
                    populated_rules[rule.catchAll].add(item_index)

        block_map = content["blockMap"]["document"]["nodes"]
        html = content["html"]
        result = []

        from datalab.serializers import OrderItemSerializer
        order = OrderItemSerializer(self.datalab.order,
                                    many=True,
                                    context={
                                        "steps": self.datalab.steps
                                    }).data

        # Populate the content for each record
        for item_index, item in enumerate(filtered_data):
            populated_content = ""

            for block_index, block in enumerate(block_map):
                if block["type"] == "condition":
                    condition_id = block["data"]["conditionId"]
                    if item_index in populated_rules.get(
                            ObjectId(condition_id), {}):
                        populated_content += parse_content_line(
                            html[block_index], item, order)
                else:
                    populated_content += parse_content_line(
                        html[block_index], item, order)

            result.append(populated_content)

        return result
Ejemplo n.º 5
0
    def populate_content(self, content=None, email=False):
        if not content and not self.content:
            return []
        elif not content:
            content = self.content

        filtered_data = self.data["records"]
        types = self.options["types"]

        # Assign each student record to the rule groups
        populated_rules = defaultdict(set)
        for item_index, item in enumerate(filtered_data):
            for rule in self.rules:
                parameters = rule.parameters
                did_match = False

                for condition in rule.conditions:
                    if all([
                            did_pass_test(
                                condition.formulas[parameter_index],
                                item.get(parameter),
                                types.get(parameter),
                            ) for parameter_index, parameter in enumerate(
                                parameters)
                    ]):
                        did_match = True
                        populated_rules[condition.conditionId].add(item_index)
                        break

                if not did_match:
                    populated_rules[rule.catchAll].add(item_index)

        result = []
        from datalab.serializers import OrderItemSerializer
        order = OrderItemSerializer(self.datalab.order,
                                    many=True,
                                    context={
                                        "steps": self.datalab.steps
                                    }).data

        condition_ids = list(set(re.findall(r"conditionid=\"(.*?)\"",
                                            content)))
        condition_tag_locations = generate_condition_tag_locations(content)
        forms = Form.objects.filter(datalab=self.datalab)
        """
        Generate HTML string for each student based on conditions and attributes
        Algo:
        1. Delete condition blocks that do not match the student attributes
            - Get a list of deleteIndexes of (start,stop) slices of condition tags to delete
            - Perform the iterative deletion
        2. Clean the HTML (replace <attribute>, <condition>, <rule>) to actual HTML tags
        """
        for item_index, item in enumerate(filtered_data):
            html = content

            # 1
            deleteIndexes = []
            for condition_id in condition_ids:
                if not item_index in populated_rules.get(
                        ObjectId(condition_id), {}):
                    deleteIndexes += condition_tag_locations[condition_id]
            html = delete_html_by_indexes(html, deleteIndexes)

            # 2
            html = strip_tags(html, "condition")
            html = strip_tags(html, "rule")
            html = parse_attribute(html, item, order, forms)
            # html = parse_link(html, item, order, self.id, job_id)
            if email is False:
                # normal case: parse the link to a simple <a> </a>
                html = simple_parse_link(html, item, order)

            result.append(html)
        return result