def ii_init(self, record_info_in: sdk.RecordInfo) -> bool: if not self.parent.params_are_valid(): return False self.IncomingRecordInfo = record_info_in self.FileNameField = record_info_in.get_field_by_name(self.parent.FileNameFieldName) self.NameField = record_info_in.get_field_by_name(self.parent.NameFieldName) return True
def ii_init(self, record_info_in: Sdk.RecordInfo) -> bool: # Make sure the user provided a field to parse if self.parent.url is None: self.parent.display_error_msg('Select a source field') return False # Get information about the source path field try: self.SourceField = record_info_in.get_field_by_name( self.parent.url) except Exception as e: self.parent.display_error_msg( f'Invalid source field: {self.parent.url}') return False #match_field_type: Sdk.FieldType = self.SourceField.type #match_field_size: int = self.SourceField.size # Returns a new, empty RecordCreator object that is identical to record_info_in. self.record_info_out = record_info_in.clone() #output field config self.fields = [['protocol', Sdk.FieldType.string, 20], ['net_location', Sdk.FieldType.string, 100], ['path', Sdk.FieldType.string, 1000], ['query', Sdk.FieldType.string, 1000], ['parsed_query', Sdk.FieldType.string, 1000], ['fragment', Sdk.FieldType.string, 100], ['hostname', Sdk.FieldType.string, 100], ['port', Sdk.FieldType.int32, 10]] self.outRecords = [] # Adds field to record with specified name and output type. for name, dtype, size in self.fields: self.outRecords.append( self.record_info_out.add_field(name, dtype, size)) # Lets the downstream tools know what the outgoing record metadata will look like self.parent.output.init(self.record_info_out) # Creating a new, empty record creator based on record_info_out's record layout. self.record_creator = self.record_info_out.construct_record_creator() # Instantiate a new instance of the RecordCopier class. self.record_copier = Sdk.RecordCopier(self.record_info_out, record_info_in) # Map each column of the input to where we want in the output. for index in range(record_info_in.num_fields): # Adding a field index mapping. self.record_copier.add(index, index) # Let record copier know that all field mappings have been added. self.record_copier.done_adding() return True
def test_record_copier_proxy(): engine = AlteryxEngine() input_record_info = RecordInfo(engine) output_record_info = RecordInfo(engine) field_props = [ { "field_name": "a", "field_type": FieldType.int32 }, { "field_name": "b", "field_type": FieldType.v_wstring }, ] for props in field_props: input_record_info.add_field(**props) output_record_info.add_field(**props) record_copier_proxy = RecordCopierProxy(input_record_info, output_record_info, { "a": "a", "b": "b" }) record = RecordRef(input_record_info) record.data["a"] = 123 record.data["b"] = "Hello world" record_copier = record_copier_proxy.copy(record) assert record == record_copier.finalize_record() assert record is not record_copier.finalize_record()
def ii_init(self, record_info_in: Sdk.RecordInfo) -> bool: # Make sure the user provided a field to parse if self.parent.SourceFieldName is None: self.parent.display_error_msg('Select a source field') return False # Make sure the user provided a field to parse if self.parent.DestFieldName is None: self.parent.display_error_msg('Select a destination field') return False # Get information about the source path field self.SourceField = record_info_in.get_field_by_name( self.parent.SourceFieldName) #match_field_type: Sdk.FieldType = self.SourceField.type #match_field_size: int = self.SourceField.size # Get information about the destination path field self.DestField = record_info_in.get_field_by_name( self.parent.DestFieldName) # Returns a new, empty RecordCreator object that is identical to record_info_in. record_info_out = record_info_in.clone() # Adds field to record with specified name and output type. self.OutputField = record_info_out.add_field(self.output_name, self.output_type, self.output_size) # Lets the downstream tools know what the outgoing record metadata will look like self.parent.output.init(record_info_out) self.parent.error_output.init(record_info_out) # Creating a new, empty record creator based on record_info_out's record layout. self.record_creator = record_info_out.construct_record_creator() # Instantiate a new instance of the RecordCopier class. self.record_copier = Sdk.RecordCopier(record_info_out, record_info_in) # Map each column of the input to where we want in the output. for index in range(record_info_in.num_fields): # Adding a field index mapping. self.record_copier.add(index, index) # Let record copier know that all field mappings have been added. self.record_copier.done_adding() return True
def record_collection(): record_info = RecordInfo(AlteryxEngine()) record_info.add_field("a", FieldType.float) record_info.add_field("b", FieldType.float) record1 = RecordRef(record_info) record1.data["a"] = 123 record1.data["b"] = 666.666 record2 = RecordRef(record_info) record2.data["a"] = 456 record2.data["b"] = 999.999 records = [record1, record2] return record_info, records
def ii_init(self, record_info_in: Sdk.RecordInfo) -> bool: # Make sure the user provided a field to parse # Returns a new, empty RecordCreator object that is identical to record_info_in. record_info_out = record_info_in.clone() # Lets the downstream tools know what the outgoing record metadata will look like self.parent.output.init(record_info_out) self.parent.error_output.init(record_info_out) # Creating a new, empty record creator based on record_info_out's record layout. self.record_creator = record_info_out.construct_record_creator() # Instantiate a new instance of the RecordCopier class. self.record_copier = Sdk.RecordCopier(record_info_out, record_info_in) # Map each column of the input to where we want in the output. for index in range(record_info_in.num_fields): # Adding a field index mapping. self.record_copier.add(index, index) # Let record copier know that all field mappings have been added. self.record_copier.done_adding() return True
def ii_init(self, record_info_in: Sdk.RecordInfo) -> bool: # Make sure the user provided a field to parse if self.parent.CompanyNamesField is None: self.parent.display_error_msg('Select a field') return False self.CompanyNamesField = record_info_in.get_field_by_name( self.parent.CompanyNamesField) # Returns a new, empty RecordCreator object that is identical to record_info_in. output_record = record_info_in.clone() # Adds field to record with specified name and output type. self.IdField = output_record.add_field("Glassdoor ID", Sdk.FieldType.v_wstring, 100) self.NameField = output_record.add_field("Glassdoor Name", Sdk.FieldType.v_wstring, 100) self.ReviewLinkField = output_record.add_field( "Begin Review Search Link", Sdk.FieldType.v_wstring, 1000) self.ReviewPagesField = output_record.add_field( "Review Pages", Sdk.FieldType.int64) self.InterviewLinkField = output_record.add_field( "Begin Interview Search Link", Sdk.FieldType.v_wstring, 1000) self.InterviewPagesField = output_record.add_field( "Interview Pages", Sdk.FieldType.int64) # Lets the downstream tools know what the outgoing record metadata will look like self.parent.Output.init(output_record) self.parent.Reviews.init(self.ReviewsRecord) self.parent.Interviews.init(self.InterviewsRecord) # Creating a new, empty record creator based on record_info_out's record layout. self.OutputCreator = output_record.construct_record_creator() # Instantiate a new instance of the RecordCopier class. self.OutputCopier = Sdk.RecordCopier(output_record, record_info_in) # Map each column of the input to where we want in the output. for index in range(record_info_in.num_fields): # Adding a field index mapping. self.OutputCopier.add(index, index) # Let record copier know that all field mappings have been added. self.OutputCopier.done_adding() return True
def ii_init(self, record_info_in: Sdk.RecordInfo) -> bool: self.EventField = record_info_in.get_field_by_name('Event', throw_error=False) if self.EventField is None: self.parent.display_error_msg("Incoming data source must contain an 'Event' text field that pushes 'Start' and 'End' events") return False self.parent.Output.init(self.Info) return True
def ii_init(self, record_info_in: Sdk.RecordInfo) -> bool: self.uploadFileField = record_info_in.get_field_by_name( self.parent.uploadFileField) self.output_info = self._generate_output_record_info() self.output_creator = self.output_info.construct_record_creator() self.batch_read_url = self.parent.endpoint + """vision/v2.0/read/core/asyncBatchAnalyze""" self.parent.output.init(self.output_info) return True
def __init__( self, input_record_info: RecordInfo, output_record_info: RecordInfo, field_name_map: Mapping[str, str], ): """Construct a record copier proxy object.""" self._input_record_info = input_record_info self._output_record_info = output_record_info self._record_copier = RecordCopier(input_record_info, output_record_info) for input_name, output_name in field_name_map.items(): input_idx = input_record_info.get_field_num(input_name) storage_idx = output_record_info.get_field_num(output_name) self._record_copier.add(storage_idx, input_idx) self._record_copier.done_adding()
def _list_to_recordinfo(self, rec_info: Sdk.RecordInfo, field_list: list, source: str = "SFTP Downloader"): """Adds fields to RecordInfo according to list :param rec_info: RecordInfo object to which fields are to be added :type rec_info: Sdk.RecordInfo :param field_list: List of dict() containing field information :type field_list: list """ # Iterate through list for field in field_list: rec_info.add_field(field['name'], field['type'], field['size'], source=source, description=field['description']) return
def test_raw_record_container_construction(): record_info = RecordInfo(AlteryxEngine()) record_info.add_field(field_name="a", field_type=FieldType.byte) record_info.add_field(field_name="b", field_type=FieldType.fixeddecimal) with pytest.raises(ValueError): RawRecordContainer(record_info, record_info)
def ii_init(self, record_info_in: Sdk.RecordInfo) -> bool: self.EventField = record_info_in.get_field_by_name('Event', throw_error=False) if self.EventField is None: self.parent.display_error_msg("Incoming data source must contain an 'Event' text field that pushes 'Start' and 'End' events") return False self.parent.Output.init(self.RecordInfo) checkpoint_store = BlobCheckpointStore.from_connection_string(self.parent.CheckpointConnStr, self.parent.CheckpointContainer) self.Client = EventHubConsumerClient.from_connection_string(self.parent.EventHubsConnStr, consumer_group=self.parent.ConsumerGroup, eventhub_name=self.parent.EventHubName, checkpoint_store=checkpoint_store) self.parent.display_info_msg("Event Hubs receive client created") return True
def test_field_proxy(): record_info = RecordInfo(AlteryxEngine()) record_info.add_field( field_name="float", field_type=FieldType.float, size=1, scale=1, source="test.txt", description="Test description", ) field_proxy = FieldProxy(record_info.fields[0]) record_creator = RecordCreator(record_info) assert field_proxy.name == "float" field_proxy.set(record_creator, "10.0") assert field_proxy.get(record_creator.finalize_record()) == 10.0 field_proxy.set(record_creator, NULL_VALUE_PLACEHOLDER) assert field_proxy.get(record_creator.finalize_record()) is None field_proxy.set_null(record_creator) assert field_proxy.get(record_creator.finalize_record()) is None
def generate_records_from_df( df: "pd.DataFrame", record_info: RecordInfo ) -> Generator[RecordCreator, None, None]: """Generate record creators from a dataframe.""" fill_df_nulls_with_blackbird_nulls(df) columns = list(df) field_map = {field.name: FieldProxy(field) for field in record_info} fields = [field_map[column_name] for column_name in columns] record_creator = record_info.construct_record_creator() col_range = range(len(fields)) for row in df.itertuples(): record_creator.reset() for col_idx in col_range: fields[col_idx].set(record_creator, row[col_idx + 1]) yield record_creator
def ii_init(self, record_info_in: Sdk.RecordInfo) -> bool: self.InInfo = record_info_in self.Text1Field = record_info_in.get_field_by_name(self.parent.Text1) self.Text2Field = record_info_in.get_field_by_name(self.parent.Text2) self.OutInfo = self.InInfo.clone() self.ScoreField = self.OutInfo.add_field(self.parent.OutputField, Sdk.FieldType.double, source=self.parent.label) self.Creator = self.OutInfo.construct_record_creator() self.Copier = Sdk.RecordCopier(self.OutInfo, self.InInfo) index = 0 while index < self.InInfo.num_fields: self.Copier.add(index, index) index += 1 self.Copier.done_adding() self.parent.Output.init(self.OutInfo) return True
def ii_init(self, record_info_in: Sdk.RecordInfo) -> bool: self.EventField = record_info_in.get_field_by_name('Event', throw_error=False) if self.EventField is None: self.parent.display_error_msg( "Incoming data source must contain an 'Event' text field that pushes 'Start' and 'End' events" ) return False self.parent.Output.init(self.RecordInfo) self.Loop = asyncio.get_event_loop() self.Connection = pika.BlockingConnection( pika.ConnectionParameters(host=self.parent.Host)) self.Channel = self.Connection.channel() self.Channel.queue_declare(self.parent.Queue) self.Channel.basic_consume(queue=self.parent.Queue, on_message_callback=self._push_event, auto_ack=True) self.parent.display_info_msg("RabbitMQ connection created") return True
def test_generate_records_from_df(): df = pd.DataFrame({"a": [1, 2, 3], "b": ["Hello", "from", "blackbird"]}) record_info = RecordInfo(AlteryxEngine()) record_info.add_field(field_name="a", field_type=FieldType.byte) record_info.add_field(field_name="b", field_type=FieldType.v_string) record_generator = generate_records_from_df(df, record_info) num_rows, _ = df.shape for row, record in enumerate(record_generator): for column in list(df): assert record.finalize_record().data[column] == df[column][row]
def ii_init(self, record_info_in: Sdk.RecordInfo) -> bool: # Get the fields from the incoming connection fields: List[str] = [] for field in record_info_in: fields.append(field.name) # Map each column of the input to where we want in the output. record_info_out = Sdk.RecordInfo(self.parent.alteryx_engine) self.record_copier = Sdk.RecordCopier(record_info_out, record_info_in) translations = sort_fields(fields, self.parent.SortList, self.parent.Alphabetical) for translation in translations: Sdk.Field = record_info_out.add_field(record_info_in.get_field_by_name(translation.name)) self.record_copier.add(translation.index_to, translation.index_from) self.record_copier.done_adding() # Lets the downstream tools know what the outgoing record metadata will look like self.parent.output.init(record_info_out) # Creating a new, empty record creator based on record_info_out's record layout. self.record_creator = record_info_out.construct_record_creator() return True
def test_simple_raw_record_container(): record_info = RecordInfo(AlteryxEngine()) record_info.add_field(field_name="a", field_type=FieldType.byte) record_info.add_field(field_name="b", field_type=FieldType.fixeddecimal) container = RawRecordContainer(record_info) record1 = RecordRef(record_info) record1.data["a"] = 123 record1.data["b"] = 666.666 record2 = RecordRef(record_info) record2.data["a"] = 456 record2.data["b"] = 999.999 records = [record1, record2] for record in records: container.add_record(record) for input_record, saved_record in zip(records, container.records): assert input_record.data == saved_record.finalize_record().data df = pd.DataFrame({"b": [1000, 2000]}) container.update_with_dataframe(df) assert container.records[0].finalize_record().data == {"a": 123, "b": 1000} assert container.records[1].finalize_record().data == {"a": 456, "b": 2000} df = pd.DataFrame({"b": [1000, 2000, 3000]}) with pytest.raises(ValueError): container.update_with_dataframe(df) df = pd.DataFrame({"d": [1000, 2000]}) with pytest.raises(RuntimeError): container.update_with_dataframe(df) container.clear_records() assert len(container.records) == 0
def test_simple_parsed_record_container(): record_info = RecordInfo(AlteryxEngine()) record_info.add_field(field_name="a", field_type=FieldType.byte) record_info.add_field(field_name="b", field_type=FieldType.fixeddecimal) container = ParsedRecordContainer(record_info) record1 = RecordRef(record_info) record1.data["a"] = 123 record1.data["b"] = 666.666 record2 = RecordRef(record_info) record2.data["a"] = 456 record2.data["b"] = 999.999 records = [record1, record2] for record in records: container.add_record(record) df = container.build_dataframe() assert df.equals(pd.DataFrame({"a": [123, 456], "b": [666.666, 999.999]}))
def test_mapped_raw_record_container(): input_record_info = RecordInfo(AlteryxEngine()) input_record_info.add_field(field_name="a", field_type=FieldType.byte) input_record_info.add_field(field_name="b", field_type=FieldType.fixeddecimal) storage_record_info = RecordInfo(AlteryxEngine()) storage_record_info.add_field(field_name="c", field_type=FieldType.fixeddecimal) storage_record_info.add_field(field_name="d", field_type=FieldType.byte) field_map = {"a": "d", "b": "c"} container = RawRecordContainer(input_record_info, storage_record_info, field_map) record1 = RecordRef(input_record_info) record1.data["a"] = 123 record1.data["b"] = 666.666 record2 = RecordRef(input_record_info) record2.data["a"] = 456 record2.data["b"] = 999.999 records = [record1, record2] for record in records: container.add_record(record) for input_record, saved_record in zip(records, container.records): assert input_record.data["a"] == saved_record.finalize_record( ).data["d"] assert input_record.data["b"] == saved_record.finalize_record( ).data["c"]
def generate_interview_fields(self, record: Sdk.RecordInfo, source: str) -> List[Sdk.Field]: return [ record.add_field("Glassdoor ID", Sdk.FieldType.v_wstring, 10, 0, source, ''), record.add_field("Company Name", Sdk.FieldType.v_wstring, 100, 0, source, ''), record.add_field("Date", Sdk.FieldType.v_wstring, 20, 0, source, ''), record.add_field("Title (Analyst Interview)", Sdk.FieldType.v_wstring, 256, 0, source, ''), record.add_field("Experience", Sdk.FieldType.v_wstring, 20, 0, source, ''), record.add_field("Offer", Sdk.FieldType.v_wstring, 20, 0, source, ''), record.add_field("Difficulty", Sdk.FieldType.v_wstring, 20, 0, source, ''), record.add_field("Getting an interview", Sdk.FieldType.v_wstring, 20, 0, source, ''), record.add_field("Application", Sdk.FieldType.v_wstring, 5000, 0, source, ''), record.add_field("Interview (description/verbatim)", Sdk.FieldType.v_wstring, 5000, 0, source, ''), record.add_field("Interview (Questions)", Sdk.FieldType.v_wstring, 5000, 0, source, ''), ]
def generate_review_fields(self, record: Sdk.RecordInfo, source: str) -> List[Sdk.Field]: return [ record.add_field("Glassdoor ID", Sdk.FieldType.v_wstring, 10, 0, source, ''), record.add_field("Company Name", Sdk.FieldType.v_wstring, 100, 0, source, ''), record.add_field("Review Date", Sdk.FieldType.v_wstring, 20, 0, source, ''), record.add_field("Helpful (count)", Sdk.FieldType.v_wstring, 10, 0, source, ''), record.add_field("Title (of the review)", Sdk.FieldType.v_wstring, 256, 0, source, ''), record.add_field("Rating (out of 5)", Sdk.FieldType.v_wstring, 3, 0, source, ''), record.add_field("Current/ Past Employee", Sdk.FieldType.v_wstring, 10, 0, source, ''), record.add_field("Employee Title", Sdk.FieldType.v_wstring, 100, 0, source, ''), record.add_field("Employment Type", Sdk.FieldType.v_wstring, 100, 0, source, ''), record.add_field("Location", Sdk.FieldType.v_wstring, 100, 0, source, ''), record.add_field("Recommends", Sdk.FieldType.v_wstring, 10, 0, source, ''), record.add_field("Positive Outlook", Sdk.FieldType.v_wstring, 10, 0, source, ''), record.add_field("Approves of CEO", Sdk.FieldType.v_wstring, 10, 0, source, ''), record.add_field("Time Employed", Sdk.FieldType.v_wstring, 100, 0, source, ''), record.add_field("Pros", Sdk.FieldType.v_wstring, 5000, 0, source, ''), record.add_field("Cons", Sdk.FieldType.v_wstring, 5000, 0, source, ''), record.add_field("Advice to Management", Sdk.FieldType.v_wstring, 5000, 0, source, '') ]
def build_ayx_record_from_list( values_list: List[Any], metadata_list: List[dict], record_info: sdk.RecordInfo, record_creator: Optional[sdk.RecordCreator] = None, ) -> Tuple[object, object]: """ Build a record from a list of values. Takes a list of values that represents a single row of data, along with metadata and a blank or already populated Alteryx RecordInfo object, and returns a tuple containing a populated Alteryx RecordRef object and an already initialized RecordCreator object. The returned RecordCreator object can optionally be passed back into the function, allowing for improved performance when looping through a list of new values. Parameters ---------- values_list : List[Any] A list of Python objects of any type that represents a single record of data. The 0th index of the list represents data in the first column of the record, and so on. metadata_list : List[dict] (This might not be a list) A list of the names, types, sizes, sources, and descriptions for each respective column. These are used to generate the Alteryx RecordInfo object (if it doesn't already exist) for the names of each respective Field object. record_info : object An Alteryx RecordInfo object. Alteryx RecordInfo objects act as containers for the necessary metadata needed by the Alteryx engine to generate, identify, and manipulate each record of data passing through the tool. record_creator : Optional[object] An optional Alteryx RecordCreator object. The RecordCreator object is created by calling the construct_record_creator method on an Alteryx RecordInfo object. It is a stateful object which is populated with values as a side-effect of this function. When its finalize method is called, it returns an actual reference to the record's data, in the form of an Alteryx RecordRef object. If no record_creator object is passed into the function, one will be created using the record_info object. The function will automatically reset the record_creator if one is passed in. Returns ------- Tuple(object, object) First value in tuple: Alteryx RecordRef object, with each Field populated with the respective values in the values_list parameter. Second value in tuple: Alteryx RecordCreator object. If one was passed in as a parameter, it returns it after creating a record with it. If one is not passed in, it creates a new one from the RecordInfo param, uses it to create a record, and returns it. """ columns = [ Column( metadata_list[i].name, metadata_list[i].type, metadata_list[i].size, metadata_list[i].source, metadata_list[i].description, values_list[i], ) for i in range(len(metadata_list)) ] if record_info.num_fields == 0: for column in columns: add_output_column_to_record_info(column, record_info) if record_creator: record_creator.reset() else: record_creator = record_info.construct_record_creator() for column in columns: field = record_info.get_field_by_name(column.name) set_field_value(field, column.value, record_creator) ayx_record = record_creator.finalize_record() return (ayx_record, record_creator)