Esempio n. 1
0
    def ii_init(self, record_info_in: Sdk.RecordInfo) -> bool:
        # Make sure the user provided a field to parse

        # Returns a new, empty RecordCreator object that is identical to record_info_in.
        record_info_out = record_info_in.clone()

        # Lets the downstream tools know what the outgoing record metadata will look like
        self.parent.output.init(record_info_out)
        self.parent.error_output.init(record_info_out)

        # Creating a new, empty record creator based on record_info_out's record layout.
        self.record_creator = record_info_out.construct_record_creator()

        # Instantiate a new instance of the RecordCopier class.
        self.record_copier = Sdk.RecordCopier(record_info_out, record_info_in)

        # Map each column of the input to where we want in the output.
        for index in range(record_info_in.num_fields):
            # Adding a field index mapping.
            self.record_copier.add(index, index)

        # Let record copier know that all field mappings have been added.
        self.record_copier.done_adding()

        return True
    def ii_init(self, record_info_in: Sdk.RecordInfo) -> bool:
        # Make sure the user provided a field to parse
        if self.parent.url is None:
            self.parent.display_error_msg('Select a source field')
            return False

        # Get information about the source path field
        try:
            self.SourceField = record_info_in.get_field_by_name(
                self.parent.url)
        except Exception as e:
            self.parent.display_error_msg(
                f'Invalid source field: {self.parent.url}')
            return False
        #match_field_type: Sdk.FieldType = self.SourceField.type
        #match_field_size: int = self.SourceField.size

        # Returns a new, empty RecordCreator object that is identical to record_info_in.
        self.record_info_out = record_info_in.clone()

        #output field config
        self.fields = [['protocol', Sdk.FieldType.string, 20],
                       ['net_location', Sdk.FieldType.string, 100],
                       ['path', Sdk.FieldType.string, 1000],
                       ['query', Sdk.FieldType.string, 1000],
                       ['parsed_query', Sdk.FieldType.string, 1000],
                       ['fragment', Sdk.FieldType.string, 100],
                       ['hostname', Sdk.FieldType.string, 100],
                       ['port', Sdk.FieldType.int32, 10]]

        self.outRecords = []
        # Adds field to record with specified name and output type.
        for name, dtype, size in self.fields:
            self.outRecords.append(
                self.record_info_out.add_field(name, dtype, size))

        # Lets the downstream tools know what the outgoing record metadata will look like
        self.parent.output.init(self.record_info_out)

        # Creating a new, empty record creator based on record_info_out's record layout.
        self.record_creator = self.record_info_out.construct_record_creator()

        # Instantiate a new instance of the RecordCopier class.
        self.record_copier = Sdk.RecordCopier(self.record_info_out,
                                              record_info_in)

        # Map each column of the input to where we want in the output.
        for index in range(record_info_in.num_fields):
            # Adding a field index mapping.
            self.record_copier.add(index, index)

        # Let record copier know that all field mappings have been added.
        self.record_copier.done_adding()

        return True
Esempio n. 3
0
    def ii_init(self, record_info_in: Sdk.RecordInfo) -> bool:
        # Make sure the user provided a field to parse
        if self.parent.SourceFieldName is None:
            self.parent.display_error_msg('Select a source field')
            return False

        # Make sure the user provided a field to parse
        if self.parent.DestFieldName is None:
            self.parent.display_error_msg('Select a destination field')
            return False

        # Get information about the source path field
        self.SourceField = record_info_in.get_field_by_name(
            self.parent.SourceFieldName)
        #match_field_type: Sdk.FieldType = self.SourceField.type
        #match_field_size: int = self.SourceField.size

        # Get information about the destination path field
        self.DestField = record_info_in.get_field_by_name(
            self.parent.DestFieldName)

        # Returns a new, empty RecordCreator object that is identical to record_info_in.
        record_info_out = record_info_in.clone()

        # Adds field to record with specified name and output type.
        self.OutputField = record_info_out.add_field(self.output_name,
                                                     self.output_type,
                                                     self.output_size)

        # Lets the downstream tools know what the outgoing record metadata will look like
        self.parent.output.init(record_info_out)
        self.parent.error_output.init(record_info_out)

        # Creating a new, empty record creator based on record_info_out's record layout.
        self.record_creator = record_info_out.construct_record_creator()

        # Instantiate a new instance of the RecordCopier class.
        self.record_copier = Sdk.RecordCopier(record_info_out, record_info_in)

        # Map each column of the input to where we want in the output.
        for index in range(record_info_in.num_fields):
            # Adding a field index mapping.
            self.record_copier.add(index, index)

        # Let record copier know that all field mappings have been added.
        self.record_copier.done_adding()

        return True
Esempio n. 4
0
    def ii_init(self, record_info_in: Sdk.RecordInfo) -> bool:
        # Make sure the user provided a field to parse
        if self.parent.CompanyNamesField is None:
            self.parent.display_error_msg('Select a field')
            return False

        self.CompanyNamesField = record_info_in.get_field_by_name(
            self.parent.CompanyNamesField)

        # Returns a new, empty RecordCreator object that is identical to record_info_in.
        output_record = record_info_in.clone()

        # Adds field to record with specified name and output type.
        self.IdField = output_record.add_field("Glassdoor ID",
                                               Sdk.FieldType.v_wstring, 100)
        self.NameField = output_record.add_field("Glassdoor Name",
                                                 Sdk.FieldType.v_wstring, 100)
        self.ReviewLinkField = output_record.add_field(
            "Begin Review Search Link", Sdk.FieldType.v_wstring, 1000)
        self.ReviewPagesField = output_record.add_field(
            "Review Pages", Sdk.FieldType.int64)
        self.InterviewLinkField = output_record.add_field(
            "Begin Interview Search Link", Sdk.FieldType.v_wstring, 1000)
        self.InterviewPagesField = output_record.add_field(
            "Interview Pages", Sdk.FieldType.int64)

        # Lets the downstream tools know what the outgoing record metadata will look like
        self.parent.Output.init(output_record)
        self.parent.Reviews.init(self.ReviewsRecord)
        self.parent.Interviews.init(self.InterviewsRecord)

        # Creating a new, empty record creator based on record_info_out's record layout.
        self.OutputCreator = output_record.construct_record_creator()

        # Instantiate a new instance of the RecordCopier class.
        self.OutputCopier = Sdk.RecordCopier(output_record, record_info_in)

        # Map each column of the input to where we want in the output.
        for index in range(record_info_in.num_fields):
            # Adding a field index mapping.
            self.OutputCopier.add(index, index)

        # Let record copier know that all field mappings have been added.
        self.OutputCopier.done_adding()

        return True