def run(self):
        schematches = SchemaMatch()

        dal = DALMongo(self.project_id)

        # se obtienen las columnas originales
        schema1 = { c for c in dal.get_schema(1)}
        schema2 = { c for c in dal.get_schema(2)}

        # se crea un obj SchemaMatch con los pares de columans elegidos
        for match in self.matches:
            cols1 = [schema1[col_name] for col_name in match['source1']]
            cols2 = [schema2[col_name] for col_name in match['source2']]

            schematches.add_match(cols1, cols2, match['custom_name'])

        # Schemas are standardised
        self.records1 = self._standardise_schema(self.records1, schematches, 1,
        self.records2 = self._standardise_schema(self.records2, schematches, 2,

        # Create the global schema
        # taking one record and getting the matched schema will be enough
        for col_name, col_obj in self.records1[0].columns.items():
            if col_name.startswith("__new__") or self.remaining_columns:
                    Column(col_name, [], col_obj.type, col_obj.is_new,
                           col_obj.custom_name), self.project_id)
        return self.schema, self.records1, self.records2
    def _segment_source(self, source_number):
        dal = DALMongo(self.project_id)

        records = dal.get_records(StandardisationAndTaggingStep().class_name,
        # module = self._load_module(records=records)

        # Initialize columns to store new segmented schema
        orig_schema = {}
        for c_obj in dal.get_schema(source_number):
            orig_schema[] = c_obj

        new_cols = orig_schema

        # Run segmentation module for each column of each record
        for record in records:
            for col_name, segmentation_module in self.config["source{}".format(
                module = self._load_module(segmentation_module)
                record.columns[col_name] =[col_name])

                # This is to create the new segmented schema
                for field_obj in record.columns[col_name].fields:
                    new_col_fields = new_cols[col_name].fields
                    # If a new output field was found in this column then add it to the new schema
                    if field_obj.output_field is not None and \
                            field_obj.output_field not in [field.output_field for field in new_col_fields]:
                        # TODO tags could be appended as well but for now we leave it empty
                        new_of = Field(value="n/A",

        # Reconstruct new_cols object so that the DAL can store it
        segmented_schema = []
        for col_name, col_obj in new_cols.items():

            records, 'source{}_records'.format(source_number))
    def config_json(project_id):
        dal = DALMongo(project_id)

        cols1 = [ for c in dal.get_schema(1)]
        cols2 = [ for c in dal.get_schema(2)]

        rowmodel = {
            'type': 'row',
            'cols': {
                'source1': {
                    'label': 'Select source 1 columns',
                    'type': 'multipleselect',
                    'options': cols1
                'source2': {
                    'label': 'Select source 2 columns',
                    'type': 'multipleselect',
                    'options': cols2
                'custom_name': {
                    'label': 'New column name',
                    'type': 'text'

        return {
            'matches': {
                'type': 'rows',
                'rows': [],
                'label': 'Matches',
                "rowmodel": rowmodel
            'remaining_columns': {
                'label': 'Add remaining columns to the final schema',
                'type': 'toggleswitch',
                "color": 'blue',
                'checked': False