def seed_channel(token): print "seeding channel" url = 'https://slack.com/api/team.info?token={}&pretty=1'.format(token) response = requests.get(url) js = response.json() team_id = js['team']['id'] team_name = js['team']['domain'] channel = Channel( channel_id=team_id, cohort_name=team_name, ) db.session.add(channel) db.session.commit()
def add_channel(channel): """ Suggest a channel :return: {ok: bool, msg: str} """ if not matches_channel_name(channel): return Response(response=json.dumps({ "ok": False, "msg": "invalid channel", }), status=400, content_type="application/json") j = loop.run_until_complete(telethon_api.get_channel_info(channel)) if not j: return Response(response=json.dumps({ "ok": False, "msg": "this channel does not exist", }), status=404, content_type="application/json") if "full_chat" not in j or "id" not in j["full_chat"]: return Response(response=json.dumps({ "ok": False, "msg": "Telethon API returned invalid channel", }), status=500, content_type="application/json") existing = db.get_channel_by_id(j["full_chat"]["id"]) if existing: return Response(response=json.dumps({ "ok": False, "msg": "This channel is already being ingested", }), status=400, content_type="application/json") db.upsert_channel( Channel( channel_id=j["full_chat"]["id"], channel_name=channel, updated_utc=int(time.time()), retrieved_utc=int(time.time()), min_message_id=0, max_message_id=0, is_active=True, # What is that? is_complete=False, )) return Response(response=json.dumps({ "ok": True, "msg": "ok", }), status=200, content_type="application/json")
def post(self): client_id = self.request.get('from') logging.error('connected!: {}'.format(client_id)) Channel(id=client_id).put()
def ingest_channel(channel_name: str, channel_id: int, stop_point: int = None): BATCH_SIZE = 250 current_message_id = None max_message_id = None min_message_id = None total_messages = 0 seen_ids = set() stop_flag = False while True: es_records = [] pg_records = [] logger.debug( "Fetching %d ids (in descending order) from %s starting at id %s" % (BATCH_SIZE, channel_name, current_message_id)) messages = telethon_api.fetch_messages( channel=channel_name, size=BATCH_SIZE, max_id=current_message_id, ) retrieved_utc = int(time.time()) for m in messages: message_id = m.id if stop_point and message_id <= stop_point: stop_flag = True break if message_id in seen_ids: logger.warning("Message id %d was already ingested" % (message_id, )) seen_ids.add(message_id) total_messages += 1 if current_message_id is None or message_id < current_message_id: current_message_id = message_id if min_message_id is None or message_id < min_message_id: min_message_id = message_id if max_message_id is None or message_id > max_message_id: max_message_id = message_id message_channel_id = m.to_id.channel_id if message_channel_id != channel_id: logger.warning("Message channel id for %s does not match" "expected value. %d != %d" % (channel_name, message_channel_id, channel_id)) record_id = (message_channel_id << 32) + message_id data = m.to_json() updated_utc = retrieved_utc es_records.append( translate_message_for_es(m, channel_name, retrieved_utc)) pg_records.append( Message( record_id=record_id, message_id=message_id, channel_id=channel_id, retrieved_utc=retrieved_utc, updated_utc=updated_utc, data=data, )) db.insert_messages(pg_records) es.bulk_insert(es_records) if stop_flag: break time.sleep(1) # TODO: rate limit decorator logger.debug("A total of %d messages were ingested for channel %s" % (total_messages, channel_name)) # TODO: Should we update this at every iteration? # This way if this crashes halfway through it can resume if total_messages > 0: db.upsert_channel( Channel( channel_id=channel_id, channel_name=channel_name, updated_utc=int(time.time()), retrieved_utc=int(time.time()), min_message_id=min_message_id, max_message_id=max_message_id, is_active=True, is_complete=True, ))