class SegmentEventService(): def __init__(self): self.segment = Client(ENV["segment"]["write_key"], debug=True, on_error=self.on_error, send=True, max_queue_size=100000) ''' analytics.write_key = ENV["segment"]["write_key"] analytics.debug = True analytics.on_error = self.on_error ''' def on_error(self, error, items): print("An error occurred:", error) # # insert an event log # def insert_event(self, event_log): try: #print "%s did event %s " % (event_log["anonymousId"], event_log["event"]) #analytics.track(event_log["userId"], event_log["event"], {}, self.segment.track(event_log["userId"], event_log["event"], {}, event_log["context"], event_log["timestamp"], event_log["anonymousId"], event_log["integrations"]) except Exception, e: log.exception(e)
def upload_to_segment(): dir_path = dirname(realpath(__file__)) audit_file_path = join(dir_path, "auditing") client = Client(config.SEGMENT_API_KEY) for txt_file in [ join(audit_file_path, filename) for filename in listdir(audit_file_path) if filename.endswith(".txt") ]: renamed_to = txt_file.replace(".txt", ".toupload") rename(txt_file, renamed_to) with open(renamed_to, "rb") as output_file: for line in output_file.readlines(): properties = json.loads(line) user = properties.pop("user") event = properties.pop("event") timestamp = parse(properties.pop("timestamp")) kwargs = { "user_id": user, "event": event, "timestamp": timestamp, "properties": properties, } client.track(**kwargs) client.flush() afterwards_to = renamed_to.replace( ".toupload", datetime.now().strftime(".%Y%m%d%H%M")) rename(renamed_to, afterwards_to)
def track(): write_key = request.args.get('writeKey') params = request.get_json(True) analytics = Client(write_key) analytics.track(params['userId'], params['event'], params['properties']) analytics.flush() return json.dumps({'status': 'success'})
class SegmentClient: """Implements base Segment client that can be extended for domain-specific use-cases.""" def __init__(self, write_key: str): is_local = not in_gcp() # When `send` is set to False, we do not send any logs to Segment. # We also set `debug` to True locally for more logging during development. self.client = Client( write_key, send=(not is_local), debug=is_local, ) def track(self, user_id: Optional[str], event_name: str, metadata: Dict[str, Any]) -> None: if user_id: self.client.track(user_id, event_name, metadata)
# Load dataframe in memory logger.info("Reading data from GCS") df = pd.read_csv(f"{gcs_directory}/tables_1.csv") logger.info(f"Loaded {df.shape[0]} rows") # Remove rows with empty account IDs logger.info(f"Removing {df['account_id'].isna().sum()} rows with no account_id") clean_df = df[~df["account_id"].isna()] # Generate the proper schema and send it to Segment logger.info("Sending data to Segment") for index, row in clean_df.iterrows(): properties = { "product": row["product"], "productUserId": row["user_id"], "subscriptionId": row["subscription_id"], "trialId": row["trial_id"], "planId": row["plan_id"], "modelName": MODEL_NAME, "modelVersion": MODEL_VERSION, "predictedAt": predicted_at, "score": row["converted_true_score"], } segment_client.track(row["account_id"], "Trial Conversion Predicted", properties) # Flush before finishing the script logger.info("Flushing queue") segment_client.flush()
print("An error occurred:", error) writeKey = os.getenv('writeKey', None) events = os.getenv('events', 500000) assert writeKey is not None, "Please configure a write key using the writeKey environment variable" print(f'Sending {events} events to write key "{writeKey}"') analytics = Client(writeKey, debug=True, on_error=on_error, max_queue_size=math.inf, upload_size=math.inf, upload_interval=1) start = time.time() for it in range(events): sys.stdout.write(f"\rProgress: {round(it / events * 100, 1)}%") analytics.track('test', f'Iterated-{it}', {'plan': it}) print() print('Shutting down..') analytics.shutdown() elapsed = time.time() - start print(f'elapsed: {elapsed} seconds') print(f'{round(events / elapsed)}rps average')