Python get 예제들, clay.config.get Python 예제들

예제 #1

0

파일 보기

파일: location_streams.py 프로젝트: eatseng/insight

    def test_ts(self):

        kafka = KafkaClient(config.get("kafka.host1") + "," + config.get("kafka.host2"))

        # consumer = SimpleConsumer(kafka, "my-group112", "test")
        consumer = SimpleConsumer(kafka, self.GROUP_NAME, self.KAFKA_TOPIC,
                                  fetch_size_bytes=3000000, buffer_size=2000000000, max_buffer_size=2000000000)

        while True:
            print("HELLO")
            # Prepare data for insert and copy to S3
            # data_str = StringIO()
            count = 0
            # last_offset = 2

            consumer.seek(2, 0)

            for message in consumer.get_messages(count=100, block=False, timeout=0.1):
                count += 1

                print(message.message.value)

            #     # Write tweets to StringIO
            #     self.write_to_data_str(message, data_str)

            # # Store batch tweets to S3
            # self.write_to_s3(data_str, last_offset)

            if count != 100:
                break

예제 #2

0

파일 보기

파일: photo_timely.py 프로젝트: eatseng/insight

    def load_data_to_s3(self, photo_data):

        data_str = StringIO()

        ordering = ["pid", "yymmddhh", "word", "url"]

        for data in photo_data:

            row_arr = []

            for field in ordering:
                val = data[field] if data[field] else None

                if val is None:
                    row_arr.append('\N')
                else:
                    row_arr.append(unicode(val))

            data_str.write('\007'.join(row_arr).encode('utf-8') + '\n')

        # Copy data for load to S3
        s3_connection = S3Connection(config.get('S3.access_key'),
                                     config.get('S3.secret'))
        bucket = s3_connection.get_bucket(config.get('S3.bucket'),
                                          validate=False)
        s3_file = Key(bucket)
        s3_file.key = self.S3_KEY
        data_str.seek(0)
        s3_file.set_contents_from_file(data_str)

예제 #3

0

파일 보기

파일: http.py 프로젝트: kracekumar/clay

 def https_open(self, req):
     ca_certs = config.get('http.ca_certs_file', DEFAULT_CA_CERTS)
     if config.get('http.verify_server_certificates', True) and os.path.exists(ca_certs):
         frags = urlparse.urlparse(req.get_full_url())
         ssl.get_server_certificate((frags.hostname, frags.port or 443),
             ca_certs=ca_certs)
     return self.do_open(httplib.HTTPSConnection, req)

예제 #4

0

파일 보기

    def get_socket(self):
        '''
        Creates and connects a new socket, or returns an existing one if this
        method was called previously. Returns a (protocol, socket) tuple, where
        protocol is either 'tcp' or 'udp'. If the returned socket is None, the
        operation failed and details were logged.
        '''
        if self.sock is not None:
            return (self.proto, self.sock)

        proto = config.get('statsd.protocol', 'udp')
        self.proto = proto
        self.host = config.get('statsd.host', None)
        self.port = config.get('statsd.port', 8125)

        if self.host is None or self.port is None:
            return (self.proto, None)

        if (self.next_retry is not None) and (self.next_retry > time.time()):
            return (self.proto, None)

        if proto == 'udp':
            self.sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
            log.debug('Created udp statsd socket')
            return (proto, self.sock)

        if proto == 'tcp':
            if self.host is None or not isinstance(self.port, int):
                log.error('Invalid TCP statsd config: host=%r port=%r',
                          self.host, self.port)
                self.sock = None
            else:
                try:
                    self.sock = socket.create_connection(address=(self.host,
                                                                  self.port),
                                                         timeout=4.0)
                    log.debug('Connected tcp statsd socket to %s:%i',
                              self.host, self.port)
                    # Succesful connection resets retry backoff to 1 second
                    self.next_retry = None
                    self.backoff = 0.5
                except socket.error:
                    log.exception('Cannot open tcp stats socket %s:%i',
                                  self.host, self.port)
                    self.sock = None

                    # Every time a connection fails, we add 25% of the backoff value
                    # We cap this at max_backoff so that we guarantee retries after
                    # some period of time
                    if self.backoff > self.max_backoff:
                        self.backoff = self.max_backoff
                    log.warning(
                        'Unable to connect to statsd, not trying again for %.03f seconds',
                        self.backoff)
                    self.next_retry = (time.time() + self.backoff)
                    self.backoff *= 1.25
            return (proto, self.sock)

        log.warning('Unknown protocol configured for statsd socket: %s', proto)
        return (proto, None)

예제 #5

0

파일 보기

파일: eb_daily_events.py 프로젝트: eatseng/insight

    def get_list_of_events(self, date, event_types):

        data = {}

        eventbrite = EventBriteRest(
            rest_endpoint=config.get("eventbrite.endpoint_rest"),
            token=config.get("eventbrite.token"))

        start_time = (date + timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%SZ')
        end_time = (date + timedelta(days=2)).strftime('%Y-%m-%dT%H:%M:%SZ')

        # Search for each event types
        for event_type in event_types:
            result = eventbrite.search_events_by(event_type, start_time,
                                                 end_time, 1)

            # Determine the number of events returned from api
            event_count = result["pagination"]["object_count"]
            page_count = result["pagination"]["page_count"]

            # If there is data
            if event_count > 0:
                data[event_type] = []

                # Get data from each page
                for i in xrange(1, page_count + 1):

                    result = eventbrite.search_events_by(
                        event_type, start_time, end_time, i)

                    # Append data into array
                    for event in result["events"]:
                        data[event_type].append(event)

        return data

예제 #6

0

파일 보기

파일: tweet_stream_producer.py 프로젝트: eatseng/insight

    def check_stream(self):
        api = TwitterAPI(consumer_key=config.get("twitter.consumer_key"),
                         consumer_secret=config.get("twitter.consumer_secret"),
                         access_token_key=config.get("twitter.access_token"),
                         access_token_secret=config.get("twitter.access_token_secret")
                         )

        while True:

            tweeter_stream = api.request('statuses/filter', {'locations': "-123.66,32.54,-113.77,39.57,-93.82,24.32,-65.08,47.84"})

            # tweeter_stream = api.request('statuses/filter', {'locations': self.get_geo_str()})
            # print(self.get_geo_str())

            start_time = time.time()

            # print("len")
            # print((tweeter_stream.text))
            # print((tweeter_stream.stream))

            # Stream data
            for tweet in tweeter_stream:

                # Break out for loop at specified time interval to query new sets of Geo Coordinates
                if time.time() > start_time + self.REFRESH_INTERVAL:
                    print("breaktime")
                    break

                # Publish tweets to Kafka
                print(tweet)

예제 #7

0

파일 보기

파일: photo_timely.py 프로젝트: eatseng/insight

    def load_data_to_s3(self, photo_data):

        data_str = StringIO()

        ordering = ["pid", "yymmddhh", "word", "url"]

        for data in photo_data:

            row_arr = []

            for field in ordering:
                val = data[field] if data[field] else None

                if val is None:
                    row_arr.append('\N')
                else:
                    row_arr.append(unicode(val))

            data_str.write('\007'.join(row_arr).encode('utf-8') + '\n')

        # Copy data for load to S3
        s3_connection = S3Connection(config.get('S3.access_key'), config.get('S3.secret'))
        bucket = s3_connection.get_bucket(config.get('S3.bucket'), validate=False)
        s3_file = Key(bucket)
        s3_file.key = self.S3_KEY
        data_str.seek(0)
        s3_file.set_contents_from_file(data_str)

예제 #8

0

파일 보기

파일: conference.py 프로젝트: eatseng/insight

    def get_conference_pictures(self, keyword):

        json_data = []

        # Cassandra initialization
        cluster = Cluster([config.get("cassandra.host1"), config.get("cassandra.host2")])
        session = cluster.connect('insight')

        # Instagram initialization
        instagram_api = InstagramAPI(client_id=config.get("instagram.client_id"), client_secret=config.get("instagram.client_secret"))

        yymmdd = datetime.utcnow().strftime('%y') + datetime.utcnow().strftime('%m') + datetime.utcnow().strftime('%d')

        rows = session.execute(self.TOP_10_QUERY % (yymmdd))

        for (yymmdd, count, word) in rows:

            img_arr = []

            popular_media = instagram_api.media_popular(count=20)

            for media in popular_media:
                img_arr.append(media.images['standard_resolution'].url)

            json_data.append({"word": word, "count": count, "pic_url": img_arr})

        return json_data

예제 #9

0

파일 보기

파일: eb_daily_events.py 프로젝트: eatseng/insight

    def get_list_of_events(self, date, event_types):

        data = {}

        eventbrite = EventBriteRest(rest_endpoint=config.get("eventbrite.endpoint_rest"),
                                    token=config.get("eventbrite.token")
                                    )

        start_time = (date + timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%SZ')
        end_time = (date + timedelta(days=2)).strftime('%Y-%m-%dT%H:%M:%SZ')

        # Search for each event types
        for event_type in event_types:
            result = eventbrite.search_events_by(event_type, start_time, end_time, 1)

            # Determine the number of events returned from api
            event_count = result["pagination"]["object_count"]
            page_count = result["pagination"]["page_count"]

            # If there is data
            if event_count > 0:
                data[event_type] = []

                # Get data from each page
                for i in xrange(1, page_count + 1):

                    result = eventbrite.search_events_by(event_type, start_time, end_time, i)

                    # Append data into array
                    for event in result["events"]:
                        data[event_type].append(event)

        return data

예제 #10

0

파일 보기

 def __init__(self):
     self.conn_opts = dict(
         host=config.get("redshift_db.host"),
         port=config.get("redshift_db.port"),
         user=config.get("redshift_db.user"),
         password=config.get("redshift_db.password"),
         database=config.get("redshift_db.db")
     )

예제 #11

0

파일 보기

파일: mysql.py 프로젝트: eatseng/insight

 def __init__(self):
     self.conn_opts = dict(
         host=config.get("mysql_db.host"),
         port=config.get("mysql_db.port"),
         user=config.get("mysql_db.user"),
         passwd=config.get("mysql_db.password"),
         db=config.get("mysql_db.db")
     )

예제 #12

0

파일 보기

파일: test.py 프로젝트: eatseng/insight

    def run(self):

        cluster = Cluster(
            [config.get("cassandra.host1"),
             config.get("cassandra.host2")])
        session = cluster.connect('insight')

        print(session.execute("""describe tables"""))

예제 #13

0

파일 보기

파일: eb_daily_events.py 프로젝트: eatseng/insight

    def clean_s3_files(self):
        s3_connection = S3Connection(config.get('S3.access_key'),
                                     config.get('S3.secret'))
        bucket = s3_connection.get_bucket(config.get('S3.bucket'),
                                          validate=False)

        for key in bucket.list(self.S3_KEY):
            bucket.delete_key(key)

예제 #14

0

파일 보기

 def https_open(self, req):
     ca_certs = config.get('http.ca_certs_file', DEFAULT_CA_CERTS)
     if config.get('http.verify_server_certificates',
                   True) and os.path.exists(ca_certs):
         frags = urlparse(req.get_full_url())
         ssl.get_server_certificate((frags.hostname, frags.port or 443),
                                    ca_certs=ca_certs)
     return self.do_open(http.client.HTTPSConnection, req)

예제 #15

0

파일 보기

파일: redshift.py 프로젝트: eatseng/insight

 def __init__(self):
     self.conn_opts = dict(
         host=config.get("redshift_db.host"),
         port=config.get("redshift_db.port"),
         user=config.get("redshift_db.user"),
         password=config.get("redshift_db.password"),
         database=config.get("redshift_db.db")
     )

예제 #16

0

파일 보기

    def test_insta(self, options=None):

        api = InstagramAPI(client_id=config.get("instagram.client_id"),
                           client_secret=config.get("instagram.client_secret"))

        popular_media = api.media_popular(count=20)

        for media in popular_media:
            print(media.images['standard_resolution'].url)

예제 #17

0

파일 보기

파일: mail.py 프로젝트: uber/clay

def sendmail(mailto, subject, message, subtype='html', charset='utf-8',
             smtpconfig=None, attachments={}, use_starttls=False, **headers):
    '''
    Send an email to the given address. Additional SMTP headers may be specified
    as keyword arguments.
    '''

    if not smtpconfig:
        # we support both smtp and mail for legacy reasons
        # smtp is the correct usage.
        smtpconfig = config.get('smtp') or config.get('mail')

    # mailto arg is explicit to ensure that it's always set, but it's processed
    # mostly the same way as all other headers
    headers['To'] = _string_or_list(mailto)

    msg = MIMEMultipart('alternative')
    msg['Subject'] = subject
    for key, value in six.iteritems(headers):
        for val in _string_or_list(value):
            msg.add_header(key, val)

    text = MIMEText(message, subtype, charset)
    msg.attach(text)

    # Add attachments
    for file_name, file_payload in attachments.items():
        part = MIMEBase('application', 'octet-stream')
        part.set_payload(file_payload.encode(charset))
        Encoders.encode_base64(part)
        part.add_header(
            'Content-Disposition',
            'attachment; filename="%s"' % file_name
        )
        msg.attach(part)

    if not 'From' in msg:
        msg['From'] = smtpconfig.get('from')
    mailfrom = msg['From']
    assert isinstance(mailfrom, six.string_types)

    recipients = []
    for toheader in ('To', 'CC', 'BCC'):
        recipients += msg.get_all(toheader, [])
    if 'BCC' in msg:
        del msg['BCC']

    smtp = smtplib.SMTP(smtpconfig.get('host'), smtpconfig.get('port'))
    if smtpconfig.get('username', None) is not None and smtpconfig.get('password', None) is not None:
        if use_starttls:
            smtp.elho()
            smtp.starttls()
            smtp.elho()
        smtp.login(smtpconfig.get('username'), smtpconfig.get('password'))
    smtp.sendmail(mailfrom, recipients, msg.as_string())
    smtp.quit()
    log.info('Sent email to %s (Subject: %s)', recipients, subject)

예제 #18

0

파일 보기

파일: stats.py 프로젝트: JeremyGrosser/clay

    def get_socket(self):
        '''
        Creates and connects a new socket, or returns an existing one if this
        method was called previously. Returns a (protocol, socket) tuple, where
        protocol is either 'tcp' or 'udp'. If the returned socket is None, the
        operation failed and details were logged.
        '''
        if self.sock is not None:
            return (self.proto, self.sock)

        proto = config.get('statsd.protocol', 'udp')
        self.proto = proto
        self.host = config.get('statsd.host', None)
        self.port = config.get('statsd.port', 8125)

        if self.host is None or self.port is None:
            return (self.proto, None)

        if (self.next_retry is not None) and (self.next_retry > time.time()):
            return (self.proto, None)

        if proto == 'udp':
            self.sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
            log.debug('Created udp statsd socket')
            return (proto, self.sock)

        if proto == 'tcp':
            if self.host is None or not isinstance(self.port, int):
                log.error('Invalid TCP statsd config: host=%r port=%r',
                          self.host, self.port)
                self.sock = None
            else:
                try:
                    self.sock = socket.create_connection(address=(self.host, self.port), timeout=4.0)
                    log.debug('Connected tcp statsd socket to %s:%i',
                              self.host, self.port)
                    # Succesful connection resets retry backoff to 1 second
                    self.next_retry = None
                    self.backoff = 0.5
                except socket.error:
                    log.exception('Cannot open tcp stats socket %s:%i',
                                  self.host, self.port)
                    self.sock = None

                    # Every time a connection fails, we add 25% of the backoff value
                    # We cap this at max_backoff so that we guarantee retries after
                    # some period of time
                    if self.backoff > self.max_backoff:
                        self.backoff = self.max_backoff
                    log.warning('Unable to connect to statsd, not trying again for %.03f seconds', self.backoff)
                    self.next_retry = (time.time() + self.backoff)
                    self.backoff *= 1.25
            return (proto, self.sock)

        log.warning('Unknown protocol configured for statsd socket: %s', proto)
        return (proto, None)

예제 #19

0

파일 보기

파일: stats.py 프로젝트: eatseng/insight

    def get_updates(self, timestamp):

        start_time = time.time()

        d_time = datetime.fromtimestamp(long(timestamp.encode("utf-8")) / 1000)

        timestamp = convert_time_to_utc(d_time)

        # Cassandra initialization
        cluster = Cluster([config.get("cassandra.host1"), config.get("cassandra.host2")])
        session = cluster.connect('insight')

        t_yymmddhh = datetime.utcnow().strftime('%y') + datetime.utcnow().strftime('%m') + datetime.utcnow().strftime('%d') + datetime.utcnow().strftime('%H')

        print(self.QUERY_REAL_TIME_TWEETS % (t_yymmddhh, timestamp))

        tweeted_words = session.execute(self.QUERY_REAL_TIME_TWEETS % (t_yymmddhh, timestamp))
        # tweeted_words = session.execute(self.QUERY_REAL_TIME_TWEETS % "15020416")

        tweet_data = {}

        for (yymmddhh, timestamp, lat, lng, data) in tweeted_words:
            print("data")

            lat = round(float(lat), 2)
            lng = round(float(lng), 2)

            location = str(lat) + "," + str(lng)

            # Append data if not already in record else append
            if location not in tweet_data:
                tweet_data[location] = {"words": [], "tweets": 0}

            # Get words from cassandra column
            word_count_pairs = [pair for pair in data.split(":")]

            for pair in word_count_pairs:

                data = pair.split(",")
                word = data[0].encode("utf-8")
                count = int(data[1].encode("utf-8"))

                if word == "OVERALL_CNT":
                    tweet_data[location]["tweets"] += count
                    continue

                tweet_data[location]["words"].append({"word": word, "count": count})

        # Delete misc data
        if "0.0,0.0" in tweet_data:
            tweet_data.pop("0.0,0.0")
        print(tweet_data)
        print("Realtime STREAMING API exec time for Instance is " + str(time.time() - start_time))

        return tweet_data

예제 #20

0

파일 보기

파일: location_streams.py 프로젝트: eatseng/insight

    def run(self, options=None):

        # try:

        # Create table if it doesn't exist in the database
        if self.REDSHIFT.if_table_exists(self.TABLE_NAME) is False:
            self.REDSHIFT.execute(self.CREATE_TRACKING_TABLE)

        kafka = KafkaClient(config.get("kafka.host1") + "," + config.get("kafka.host2"))

        consumer = SimpleConsumer(kafka, self.GROUP_NAME, self.KAFKA_TOPIC, fetch_size_bytes=3000000,
                                  buffer_size=2000000000, max_buffer_size=2000000000)

        while True:

            # Prepare data for insert and copy to S3
            data_str = StringIO()
            csv_str = StringIO()
            count = 0

            # Get Offset from previous read
            s3_last_offset = self.get_s3_offset()

            (last_offset) = self.REDSHIFT.select(self.GET_OFFSET_QUERY)[0][0]
            last_offset = last_offset if last_offset else 0

            # Resolve difference in offset (s3 offset does not carry over from day to day)
            if s3_last_offset > last_offset:
                last_offset = s3_last_offset
                self.REDSHIFT.execute(self.UPDATE_OFFSET_QUERY % (self.GROUP_NAME, self.PARTITION, last_offset))

            print(last_offset)

            # Read from Offset
            consumer.seek(last_offset, 0)

            for message in consumer.get_messages(count=self.BATCH_SIZE, block=False, timeout=5):

                # Write tweets to StringIO
                self.write_to_data_str(message, data_str, csv_str)

                count += 1
                last_offset += 1

            # Store batch tweets to S3
            self.write_to_s3(data_str, csv_str, last_offset)

            # Track Kafka Offset
            self.REDSHIFT.execute(self.UPDATE_OFFSET_QUERY % (self.GROUP_NAME, self.PARTITION, last_offset))

            if count != self.BATCH_SIZE:
                break

예제 #21

0

파일 보기

파일: word_timely.py 프로젝트: eatseng/insight

    def get_and_load_words_to_s3(self, i):

        data_str = StringIO()

        # ordering = ["wid", "words", "latitude", "longitude", "count", "created_at"]

        timestamp = datetime.utcnow().strftime('%y') + datetime.utcnow(
        ).strftime('%m') + datetime.utcnow().strftime(
            '%d') + datetime.utcnow().strftime('%H')

        tweeted_words = self.CASSANDRA.execute(self.QUERY_TODAYS_TWEET %
                                               timestamp)
        # tweeted_words = self.CASSANDRA.execute(self.QUERY_TODAYS_TWEET % '15013006')

        for (yymmddhh, location, string, data, timestamp) in tweeted_words:

            loc = location.split(",")
            lat = float(loc[0])
            lng = float(loc[1])

            # word_count_pairs = [pair for pair in string.split(":")]
            for word, count in data.iteritems():

                i += 1
                row_arr = []

                word = word.encode("utf-8")

                if word == "OVERALL_CNT":
                    continue

                row_arr = [
                    unicode(i),
                    unicode(word),
                    unicode(lat),
                    unicode(lng),
                    unicode(count),
                    unicode(datetime.utcnow())
                ]

                data_str.write('\007'.join(row_arr).encode('utf-8') + '\n')

        # Copy data for load to S3
        s3_connection = S3Connection(config.get('S3.access_key'),
                                     config.get('S3.secret'))
        bucket = s3_connection.get_bucket(config.get('S3.bucket'),
                                          validate=False)
        s3_file = Key(bucket)
        s3_file.key = self.S3_KEY
        data_str.seek(0)
        s3_file.set_contents_from_file(data_str)

예제 #22

0

파일 보기

파일: eb_daily_events.py 프로젝트: eatseng/insight

    def test_s3(self, options=None):

        s3_key = "eventbrite/" + self.TABLE_NAME + "_" + str(calendar.timegm(datetime.utcnow().timetuple())) + ".txt"

        s3_connection = S3Connection(config.get('S3.access_key'), config.get('S3.secret'))
        bucket = s3_connection.get_bucket(config.get('S3.bucket'), validate=False)

        line_buff = StringIO()
        line_buff.write("Hello World MORE AND MORE STUFF")

        s3_file = Key(bucket)
        s3_file.key = s3_key
        line_buff.seek(0)
        s3_file.set_contents_from_file(line_buff)

예제 #23

0

파일 보기

파일: photo_timely.py 프로젝트: eatseng/insight

    def fetch_photos_for_conference(self, word_data, last_id):

        bag_of_words = []

        photo_data = []

        flickr = FlickrRest(rest_endpoint=config.get("flickr.endpoint_rest"),
                            api_key=config.get("flickr.api_key"))

        yymmddhh = datetime.utcnow().strftime('%y') + datetime.utcnow(
        ).strftime('%m') + datetime.utcnow().strftime(
            '%d') + datetime.utcnow().strftime('%H')

        db_data = self.REDSHIFT.select(self.QUERY_RS_EVENTS)

        for row in db_data:

            if row["latitude"] is None and row["longitude"] is None:
                continue

            location = ("%.2f" % row["latitude"]) + "," + ("%.2f" %
                                                           row["longitude"])

            if location not in word_data:
                continue

            for word, count in word_data[location].iteritems():
                if word == "OVERALL_CNT" or word in bag_of_words:
                    continue

                if len(word) is 0:
                    continue

                for url in flickr.get_photos_by_keyword(word):

                    # Increment last_id
                    last_id += 1

                    data = {}

                    data["pid"] = last_id
                    data["yymmddhh"] = yymmddhh
                    data["word"] = word
                    data["url"] = url

                    photo_data.append(data)

                bag_of_words.append(word)

        return photo_data

예제 #24

0

파일 보기

파일: eb_daily_events.py 프로젝트: eatseng/insight

    def load_data_to_s3(self, data):

        # Prepare data for insert and copy to S3
        data_str = StringIO()

        ordering = ["eb_id", "url", "logo_url", "event_name", "event_type", "start_time_utc", "end_time_utc", "ev_created_at", "ev_updated_at", "capacity", "online_event", "venue_id", "venue_name", "latitude", "longitude", "category", "created_at"]

        # Get all data from all given category
        for key in data:
            for event in data[key]:

                event_data = {}

                event_data["eb_id"] = event[unicode("id")].encode("utf-8") if event[unicode("id")] else 0
                event_data["url"] = event[unicode("url")].encode("utf-8") if event[unicode("url")] else None
                event_data["logo_url"] = event[unicode("logo_url")].encode("utf-8") if event[unicode("logo_url")] else None
                event_data["event_name"] = event[unicode("name")][unicode("text")].strip().encode("utf-8") if event[unicode("name")] and event[unicode("name")][unicode("text")] else None
                event_data["event_type"] = event[unicode("format")][unicode("name_localized")].encode("utf-8") if event[unicode("format")] and unicode("name_localized") in event[unicode("format")] else None
                event_data["start_time_utc"] = datetime.strptime(event[unicode("start")][unicode("utc")], '%Y-%m-%dT%H:%M:%SZ').strftime('%Y-%m-%d %H:%M:%S')
                event_data["end_time_utc"] = datetime.strptime(event[unicode("end")][unicode("utc")], '%Y-%m-%dT%H:%M:%SZ').strftime('%Y-%m-%d %H:%M:%S')
                event_data["ev_created_at"] = datetime.strptime(event[unicode("created")], '%Y-%m-%dT%H:%M:%SZ').strftime('%Y-%m-%d %H:%M:%S')
                event_data["ev_updated_at"] = datetime.strptime(event[unicode("changed")], '%Y-%m-%dT%H:%M:%SZ').strftime('%Y-%m-%d %H:%M:%S')
                event_data["capacity"] = int(event[unicode("capacity")]) if event[unicode("capacity")] else 0
                event_data["online_event"] = event[unicode("online_event")].encode("utf-8") if event[unicode("online_event")] else False
                event_data["venue_id"] = int(event[unicode("venue_id")]) if event[unicode("venue_id")] else -1
                event_data["venue_name"] = event[unicode("venue")][unicode("name")].encode("utf-8") if event[unicode("venue")] and unicode("name") in event[unicode("venue")] and event[unicode("venue")][unicode("name")] else None
                event_data["latitude"] = float(event[unicode("venue")][unicode("latitude")]) if event[unicode("venue")] and unicode("latitude") in event[unicode("venue")] else 0
                event_data["longitude"] = float(event[unicode("venue")][unicode("longitude")]) if event[unicode("venue")] and unicode("longitude") in event[unicode("venue")] else 0
                event_data["category"] = key.encode("utf-8")
                event_data["created_at"] = datetime.utcnow()

                row_arr = []

                for field in ordering:
                    val = event_data[field] if event_data[field] else None
                    if val is None:
                        row_arr.append('\N')
                    else:
                        row_arr.append(unicode(val))

                data_str.write('\007'.join(row_arr).encode('utf-8') + '\n')

        # Copy data for load to S3
        s3_connection = S3Connection(config.get('S3.access_key'), config.get('S3.secret'))
        bucket = s3_connection.get_bucket(config.get('S3.bucket'), validate=False)
        s3_file = Key(bucket)
        s3_file.key = self.S3_KEY
        data_str.seek(0)
        s3_file.set_contents_from_file(data_str)

예제 #25

0

파일 보기

파일: location_streams.py 프로젝트: eatseng/insight

    def write_to_s3(self, data_str, csv_str, last_offset):

        mmddyy = datetime.utcnow().strftime('%m') + datetime.utcnow().strftime('%d') + datetime.utcnow().strftime('%y')

        # Copy data for load to S3
        s3_connection = S3Connection(config.get('S3.access_key'), config.get('S3.secret'))
        bucket = s3_connection.get_bucket(config.get('S3.bucket'), validate=False)
        s3_file = Key(bucket)
        s3_file.key = self.KAFKA_TOPIC + "/" + mmddyy + "/" + self.S3_KEY + "_offset_" + str(last_offset) + "_" + str(calendar.timegm(datetime.utcnow().timetuple())) + ".txt"
        data_str.seek(0)
        s3_file.set_contents_from_file(data_str)
        s3_file.key = self.KAFKA_TOPIC + "/" + mmddyy + "/" + self.S3_KEY + "_offset_" + str(last_offset) + "_" + str(calendar.timegm(datetime.utcnow().timetuple())) + ".csv"
        csv_str.seek(0)
        s3_file.set_contents_from_file(csv_str)
        s3_connection.close()

예제 #26

0

파일 보기

파일: mail.py 프로젝트: vireshbackup/clay

def sendmail(mailto,
             subject,
             message,
             subtype='html',
             charset='utf-8',
             smtpconfig=None,
             **headers):
    '''
    Send an email to the given address. Additional SMTP headers may be specified
    as keyword arguments.
    '''

    if not smtpconfig:
        # we support both smtp and mail for legacy reasons
        # smtp is the correct usage.
        smtpconfig = config.get('smtp') or config.get('mail')

    # mailto arg is explicit to ensure that it's always set, but it's processed
    # mostly the same way as all other headers
    headers['To'] = _string_or_list(mailto)

    msg = MIMEMultipart('alternative')
    msg['Subject'] = subject
    for key, value in six.iteritems(headers):
        for val in _string_or_list(value):
            msg.add_header(key, val)

    text = MIMEText(message, subtype, charset)
    msg.attach(text)

    if not 'From' in msg:
        msg['From'] = smtpconfig.get('from')
    mailfrom = msg['From']
    assert isinstance(mailfrom, six.string_types)

    recipients = []
    for toheader in ('To', 'CC', 'BCC'):
        recipients += msg.get_all(toheader, [])
    if 'BCC' in msg:
        del msg['BCC']

    smtp = smtplib.SMTP(smtpconfig.get('host'), smtpconfig.get('port'))
    if smtpconfig.get('username', None) is not None and smtpconfig.get(
            'password', None) is not None:
        smtp.login(smtpconfig.get('username'), smtpconfig.get('password'))
    smtp.sendmail(mailfrom, recipients, msg.as_string())
    smtp.quit()
    log.info('Sent email to %s (Subject: %s)', recipients, subject)

예제 #27

0

파일 보기

파일: photo_timely.py 프로젝트: eatseng/insight

    def fetch_photos_for_conference(self, word_data, last_id):

        bag_of_words = []

        photo_data = []

        flickr = FlickrRest(rest_endpoint=config.get("flickr.endpoint_rest"),
                            api_key=config.get("flickr.api_key")
                            )

        yymmddhh = datetime.utcnow().strftime('%y') + datetime.utcnow().strftime('%m') + datetime.utcnow().strftime('%d') + datetime.utcnow().strftime('%H')

        db_data = self.REDSHIFT.select(self.QUERY_RS_EVENTS)

        for row in db_data:

            if row["latitude"] is None and row["longitude"] is None:
                continue

            location = ("%.2f" % row["latitude"]) + "," + ("%.2f" % row["longitude"])

            if location not in word_data:
                continue

            for word, count in word_data[location].iteritems():
                if word == "OVERALL_CNT" or word in bag_of_words:
                    continue

                if len(word) is 0:
                    continue

                for url in flickr.get_photos_by_keyword(word):

                    # Increment last_id
                    last_id += 1

                    data = {}

                    data["pid"] = last_id
                    data["yymmddhh"] = yymmddhh
                    data["word"] = word
                    data["url"] = url

                    photo_data.append(data)

                bag_of_words.append(word)

        return photo_data

예제 #28

0

파일 보기

파일: bootcamp.py 프로젝트: redkaras/tornado_skeleton

def serve_web():
    parse_command_line()
    logger.info('App starting up')

    app = make_app()
    app.listen(config.get('server.port'))
    ioloop.IOLoop.current().start()

예제 #29

0

파일 보기

파일: eb_daily_events.py 프로젝트: eatseng/insight

    def test_s3(self, options=None):

        s3_key = "eventbrite/" + self.TABLE_NAME + "_" + str(
            calendar.timegm(datetime.utcnow().timetuple())) + ".txt"

        s3_connection = S3Connection(config.get('S3.access_key'),
                                     config.get('S3.secret'))
        bucket = s3_connection.get_bucket(config.get('S3.bucket'),
                                          validate=False)

        line_buff = StringIO()
        line_buff.write("Hello World MORE AND MORE STUFF")

        s3_file = Key(bucket)
        s3_file.key = s3_key
        line_buff.seek(0)
        s3_file.set_contents_from_file(line_buff)

예제 #30

0

파일 보기

파일: tweet_stream_producer.py 프로젝트: eatseng/insight

    def test_ts(self):
        kafka = KafkaClient(config.get("kafka.url"))

        producer = SimpleProducer(kafka)

        api = TwitterAPI(consumer_key=config.get("twitter.consumer_key"),
                         consumer_secret=config.get("twitter.consumer_secret"),
                         access_token_key=config.get("twitter.access_token"),
                         access_token_secret=config.get("twitter.access_token_secret")
                         )

        tweeter_stream = api.request('statuses/filter', {'locations': '-122.75,36.8,-121.75,37.8,-74,40,-73,41'})

        # Stream data
        for tweet in tweeter_stream:
            producer.send_messages("test", json.dumps(tweet))
            break

예제 #31

0

파일 보기

    def get_todays_tweet(self):

        word_data, sorted_by_loc = {}, []

        timestamp = datetime.utcnow().strftime('%y') + datetime.utcnow(
        ).strftime('%m') + datetime.utcnow().strftime(
            '%d') + datetime.utcnow().strftime('%H')

        cassandra = Cluster([
            config.get("cassandra.host1"),
            config.get("cassandra.host2"),
            config.get("cassandra.host3")
        ]).connect('insight')

        tweeted_words = cassandra.execute(self.QUERY_TODAYS_TWEET % timestamp)
        # tweeted_words = self.CASSANDRA.execute(self.QUERY_TODAYS_TWEET % '15020221')

        # for (yymmddhh, timestamp, data, lat, lng) in tweeted_words:
        for (yymmddhh, location, string, data, timestamp) in tweeted_words:

            if location not in word_data:
                word_data[location] = {}

            for word, count in data.iteritems():

                word = word.encode("utf-8")

                if word not in word_data[location]:
                    word_data[location][word] = count
                else:
                    word_data[location][word] += count

        for location in word_data:
            # In place sort by number of words in desc order
            sorted_by_loc = sorted(word_data[location].items(),
                                   key=operator.itemgetter(1))
            sorted_by_loc.reverse()

            # Re-enter sorted data
            word_data[location] = {}
            for word_pair in sorted_by_loc:
                word = word_pair[0]
                count = word_pair[1]
                word_data[location][word] = count

        return word_data

예제 #32

0

파일 보기

파일: insta_locations.py 프로젝트: eatseng/insight

    def test_insta(self, options=None):

        api = InstagramAPI(client_id=config.get("instagram.client_id"), client_secret=config.get("instagram.client_secret"))

        popular_media = api.media_popular(count=20)

        for media in popular_media:
            print(media.images['standard_resolution'].url)

예제 #33

0

파일 보기

파일: location_streams.py 프로젝트: eatseng/insight

    def get_s3_offset(self):

        # Get s3 bucket
        s3_connection = S3Connection(config.get('S3.access_key'), config.get('S3.secret'))
        s3_bucket = s3_connection.get_bucket(config.get('S3.bucket'), validate=False)

        mmddyy = datetime.utcnow().strftime('%m') + datetime.utcnow().strftime('%d') + datetime.utcnow().strftime('%y')

        s3_key = self.KAFKA_TOPIC + "/" + mmddyy + "/"

        offset = [int(key.name.split("_")[2]) for key in s3_bucket.list(prefix=s3_key) if ".txt" in key.name]

        offset = offset if len(offset) else [0]

        s3_connection.close()

        return max(offset)

예제 #34

0

파일 보기

    def select_s3(self, s3_key, select_sql, batch_size=None):
        s3_connection = S3Connection(config.get('S3.access_key'),
                                     config.get('S3.secret'))
        bucket = s3_connection.get_bucket(config.get('S3.bucket'),
                                          validate=False)

        fp = StringIO()

        conn = self.get_conn()
        cursor = conn.cursor()
        cursor.execute(select_sql)

        ordering = map(lambda c: c[0], cursor.description)

        row_count = 0
        last_row = []

        for row in cursor:
            row_arr = []
            for val in row:
                if val is None:
                    row_arr.append('\N')
                else:
                    row_arr.append(unicode(val))

            str_row = (self.COL_DELIMITER.join(row_arr).replace(
                self.ROW_DELIMITER, '') + self.ROW_DELIMITER).encode('utf-8')
            fp.write(str_row)

            row_count = row_count + 1
            last_row = row
            if batch_size is not None and row_count >= batch_size:
                break

        s3_file = Key(bucket)
        s3_file.key = s3_key
        fp.seek(0)
        s3_file.set_contents_from_file(fp)

        conn.close()

        if batch_size is None:
            return None, None
        else:
            return row_count, dict(zip(ordering, last_row))

예제 #35

0

파일 보기

파일: database.py 프로젝트: HackerWithData/BackEnd

def create_db():
    connect = None
    try:
        connect = engine.connect()
        connect.execute('CREATE DATABASE IF NOT EXISTS ' +
                        config.get('database.database'))
    finally:
        if connect:
            connect.close()

예제 #36

0

파일 보기

파일: database.py 프로젝트: HackerWithData/BackEnd

def drop_db():
    connect = None
    try:
        connect = engine.connect()
        connect.execute('DROP DATABASE IF EXISTS ' +
                        config.get('database.database'))
    finally:
        if connect:
            connect.close()

예제 #37

0

파일 보기

파일: sentry.py 프로젝트: JeremyGrosser/clay

def get_sentry_client():
    global client
    if client:
        return client
    dsn = config.get("sentry.url", None)
    if not dsn:
        return
    client = raven.Client(dsn=dsn)
    return client

예제 #38

0

파일 보기

def get_sentry_client():
    global client
    if client:
        return client
    dsn = config.get('sentry.url', None)
    if not dsn:
        return
    client = raven.Client(dsn=dsn)
    return client

예제 #39

0

파일 보기

파일: error.py 프로젝트: eatseng/insight

def error_email(**kwargs):
    if kwargs.get('table_name', None) is None or kwargs.get('error', None) is None:
        return

    if config.get('debug.enabled') is False:
        subj_str = "Job Failure - %s" % kwargs.get('table_name')
        mail_str = "Job %s failed with error: %s" % (kwargs.get('table_name'), kwargs.get('error'))
        mail_str = mail_str + ' || Stack trace: %s' % kwargs.get('trace', None)
        mail.sendmail("*****@*****.**", subj_str, mail_str)

예제 #40

0

파일 보기

    def test_sendmail_with_other_smtpconfig(self, mock_SMTP):
        mock_SMTP_instance = mock_SMTP.return_value

        mailto = '*****@*****.**'
        subject = 'This is another subject'
        message = 'This is another message'
        mail.sendmail(mailto,
                      subject,
                      message,
                      smtpconfig=config.get('othersmtp'))

        args, kwargs = mock_SMTP_instance.sendmail.call_args
        from_header = config.get('othersmtp.from')
        self.assertEqual(from_header, args[0])
        self.assertIn(mailto, args[1])
        self.assertIn('To: %s' % mailto, args[2])
        self.assertIn('From: %s' % from_header, args[2])
        self.assertIn('Subject: %s' % subject, args[2])
        self.assertIn('Content-Type: text/html', args[2])

예제 #41

0

파일 보기

파일: bootstrap_db.py 프로젝트: redkaras/tornado_skeleton

def drop_database(env='development', database_type='write'):
    """Drop the database."""
    os.environ['CLAY_CONFIG'] = './config/%s.yaml' % env

    db = config.get('database')
    db_info = db[database_type][0]
    db_name = db_info['dbname']

    print('Deleting %s database' % db_name)
    os.system('psql -q template1 -c "DROP DATABASE IF EXISTS %s";' % db_name)

예제 #42

0

파일 보기

파일: redshift.py 프로젝트: eatseng/insight

    def select_s3(self, s3_key, select_sql, batch_size=None):
        s3_connection = S3Connection(config.get('S3.access_key'), config.get('S3.secret'))
        bucket = s3_connection.get_bucket(config.get('S3.bucket'), validate=False)

        fp = StringIO()

        conn = self.get_conn()
        cursor = conn.cursor()
        cursor.execute(select_sql)

        ordering = map(lambda c: c[0], cursor.description)

        row_count = 0
        last_row = []

        for row in cursor:
            row_arr = []
            for val in row:
                if val is None:
                    row_arr.append('\N')
                else:
                    row_arr.append(unicode(val))

            str_row = (self.COL_DELIMITER.join(row_arr).replace(self.ROW_DELIMITER, '') + self.ROW_DELIMITER).encode('utf-8')
            fp.write(str_row)

            row_count = row_count + 1
            last_row = row
            if batch_size is not None and row_count >= batch_size:
                break

        s3_file = Key(bucket)
        s3_file.key = s3_key
        fp.seek(0)
        s3_file.set_contents_from_file(fp)

        conn.close()

        if batch_size is None:
            return None, None
        else:
            return row_count, dict(zip(ordering, last_row))

예제 #43

0

파일 보기

    def load_data_to_s3(self, stats_data):

        data_str = StringIO()

        ordering = [
            "tid", "eb_id", "event_name", "event_type", "latitude",
            "longitude", "start_time_utc", "end_time_utc", "tweets",
            "created_at"
        ]

        for data in stats_data:

            row_arr = []

            for field in ordering:
                val = data[field] if data[field] else None

                if val is None:
                    row_arr.append('\N')
                else:
                    row_arr.append(unicode(val))

            for data_type in ["word", "count"]:
                for i in xrange(10):
                    key = data_type + str(i + 1)

                    if key not in data:
                        row_arr.append(unicode('\N'))
                    else:
                        row_arr.append(unicode(data[key]))

            data_str.write('\007'.join(row_arr).encode('utf-8') + '\n')

        # Copy data for load to S3
        s3_connection = S3Connection(config.get('S3.access_key'),
                                     config.get('S3.secret'))
        bucket = s3_connection.get_bucket(config.get('S3.bucket'),
                                          validate=False)
        s3_file = Key(bucket)
        s3_file.key = self.S3_KEY
        data_str.seek(0)
        s3_file.set_contents_from_file(data_str)

예제 #44

0

파일 보기

파일: test_mail.py 프로젝트: JeremyGrosser/clay

    def test_sendmail_with_other_smtpconfig(self, mock_SMTP):
        mock_SMTP_instance = mock_SMTP.return_value

        mailto = '*****@*****.**'
        subject = 'This is another subject'
        message = 'This is another message'
        mail.sendmail(
            mailto,
            subject,
            message,
            smtpconfig=config.get('othersmtp'))

        args, kwargs = mock_SMTP_instance.sendmail.call_args
        from_header = config.get('othersmtp.from')
        self.assertEqual(from_header, args[0])
        self.assertIn(mailto, args[1])
        self.assertIn('To: %s' % mailto, args[2])
        self.assertIn('From: %s' % from_header, args[2])
        self.assertIn('Subject: %s' % subject, args[2])
        self.assertIn('Content-Type: text/html', args[2])

예제 #45

0

파일 보기

파일: mail.py 프로젝트: JeremyGrosser/clay

def sendmail(mailto, subject, message, subtype='html', charset='utf-8', smtpconfig=None, **headers):
    '''
    Send an email to the given address. Additional SMTP headers may be specified
    as keyword arguments.
    '''

    if not smtpconfig:
        # we support both smtp and mail for legacy reasons
        # smtp is the correct usage.
        smtpconfig = config.get('smtp') or config.get('mail')

    # mailto arg is explicit to ensure that it's always set, but it's processed
    # mostly the same way as all other headers
    headers['To'] = _string_or_list(mailto)

    msg = MIMEMultipart('alternative')
    msg['Subject'] = subject
    for key, value in headers.iteritems():
        for val in _string_or_list(value):
            msg.add_header(key, val)

    text = MIMEText(message, subtype, charset)
    msg.attach(text)

    if not 'From' in msg:
        msg['From'] = smtpconfig.get('from')
    mailfrom = msg['From']
    assert isinstance(mailfrom, basestring)

    recipients = []
    for toheader in ('To', 'CC', 'BCC'):
        recipients += msg.get_all(toheader, [])
    if 'BCC' in msg:
        del msg['BCC']

    smtp = smtplib.SMTP(smtpconfig.get('host'), smtpconfig.get('port'))
    if smtpconfig.get('username', None) is not None and smtpconfig.get('password', None) is not None:
        smtp.login(smtpconfig.get('username'), smtpconfig.get('password'))
    smtp.sendmail(mailfrom, recipients, msg.as_string())
    smtp.quit()
    log.info('Sent email to %s (Subject: %s)', recipients, subject)

예제 #46

0

파일 보기

파일: conference_timely.py 프로젝트: eatseng/insight

    def get_todays_tweet(self):

        word_data, sorted_by_loc = {}, []

        timestamp = datetime.utcnow().strftime('%y') + datetime.utcnow().strftime('%m') + datetime.utcnow().strftime('%d') + datetime.utcnow().strftime('%H')

        cassandra = Cluster([config.get("cassandra.host1"), config.get("cassandra.host2"), config.get("cassandra.host3")]).connect('insight')

        tweeted_words = cassandra.execute(self.QUERY_TODAYS_TWEET % timestamp)
        # tweeted_words = self.CASSANDRA.execute(self.QUERY_TODAYS_TWEET % '15020221')

        # for (yymmddhh, timestamp, data, lat, lng) in tweeted_words:
        for (yymmddhh, location, string, data, timestamp) in tweeted_words:

            if location not in word_data:
                word_data[location] = {}

            for word, count in data.iteritems():

                word = word.encode("utf-8")

                if word not in word_data[location]:
                    word_data[location][word] = count
                else:
                    word_data[location][word] += count

        for location in word_data:
            # In place sort by number of words in desc order
            sorted_by_loc = sorted(word_data[location].items(), key=operator.itemgetter(1))
            sorted_by_loc.reverse()

            # Re-enter sorted data
            word_data[location] = {}
            for word_pair in sorted_by_loc:
                word = word_pair[0]
                count = word_pair[1]
                word_data[location][word] = count

        return word_data

예제 #47

0

파일 보기

    def write_to_s3(self, data_str, csv_str, last_offset):

        mmddyy = datetime.utcnow().strftime('%m') + datetime.utcnow().strftime(
            '%d') + datetime.utcnow().strftime('%y')

        # Copy data for load to S3
        s3_connection = S3Connection(config.get('S3.access_key'),
                                     config.get('S3.secret'))
        bucket = s3_connection.get_bucket(config.get('S3.bucket'),
                                          validate=False)
        s3_file = Key(bucket)
        s3_file.key = self.KAFKA_TOPIC + "/" + mmddyy + "/" + self.S3_KEY + "_offset_" + str(
            last_offset) + "_" + str(
                calendar.timegm(datetime.utcnow().timetuple())) + ".txt"
        data_str.seek(0)
        s3_file.set_contents_from_file(data_str)
        s3_file.key = self.KAFKA_TOPIC + "/" + mmddyy + "/" + self.S3_KEY + "_offset_" + str(
            last_offset) + "_" + str(
                calendar.timegm(datetime.utcnow().timetuple())) + ".csv"
        csv_str.seek(0)
        s3_file.set_contents_from_file(csv_str)
        s3_connection.close()

예제 #48

0

파일 보기

파일: server.py 프로젝트: xzYue/clay

def devserver():
    if not config.get('debug.enabled', False):
        sys.stderr.write('This server must be run in development mode, set debug.enabled in your config and try again\n')
        return -1

    for modulename in config.get('views'):
        log.debug('Loading views from %s' % modulename)
        __import__(modulename)

    conf = config.get('debug.server')
    log.warning('DEVELOPMENT MODE')
    log.info('Listening on %s:%i' % (conf['host'], conf['port']))

    kwargs = {
        'use_reloader': True,
        'use_debugger': True,
        'use_evalex': True,
        'threaded': False,
        'processes': 1,
    }
    kwargs.update(config.get('debug.werkzeug', {}))
    werkzeug.serving.run_simple(conf['host'], conf['port'], application, **kwargs)

예제 #49

0

파일 보기

파일: stats.py 프로젝트: eatseng/insight

    def test(self):

        start_time = time.time()

        # Cassandra initialization
        cluster = Cluster([config.get("cassandra.host1"), config.get("cassandra.host2")])
        session = cluster.connect('insight')

        timestamp = datetime.utcnow().strftime('%y') + datetime.utcnow().strftime('%m') + datetime.utcnow().strftime('%d') + datetime.utcnow().strftime('%H')

        tweeted_words = session.execute(self.QUERY_TWEETS % timestamp)

        tweeted_words = session.execute(self.QUERY_TEST)

        cnt = 0
        for (id, counter) in tweeted_words:
            print(counter)

        print(cnt)

        print("Realtime API exec time for Instance is " + str(time.time() - start_time))

        return "super"

예제 #50

0

파일 보기

    def get_s3_offset(self):

        # Get s3 bucket
        s3_connection = S3Connection(config.get('S3.access_key'),
                                     config.get('S3.secret'))
        s3_bucket = s3_connection.get_bucket(config.get('S3.bucket'),
                                             validate=False)

        mmddyy = datetime.utcnow().strftime('%m') + datetime.utcnow().strftime(
            '%d') + datetime.utcnow().strftime('%y')

        s3_key = self.KAFKA_TOPIC + "/" + mmddyy + "/"

        offset = [
            int(key.name.split("_")[2])
            for key in s3_bucket.list(prefix=s3_key) if ".txt" in key.name
        ]

        offset = offset if len(offset) else [0]

        s3_connection.close()

        return max(offset)

예제 #51

0

파일 보기

    def test_ts(self):

        kafka = KafkaClient(
            config.get("kafka.host1") + "," + config.get("kafka.host2"))

        # consumer = SimpleConsumer(kafka, "my-group112", "test")
        consumer = SimpleConsumer(kafka,
                                  self.GROUP_NAME,
                                  self.KAFKA_TOPIC,
                                  fetch_size_bytes=3000000,
                                  buffer_size=2000000000,
                                  max_buffer_size=2000000000)

        while True:
            print("HELLO")
            # Prepare data for insert and copy to S3
            # data_str = StringIO()
            count = 0
            # last_offset = 2

            consumer.seek(2, 0)

            for message in consumer.get_messages(count=100,
                                                 block=False,
                                                 timeout=0.1):
                count += 1

                print(message.message.value)

            #     # Write tweets to StringIO
            #     self.write_to_data_str(message, data_str)

            # # Store batch tweets to S3
            # self.write_to_s3(data_str, last_offset)

            if count != 100:
                break

예제 #52

0

파일 보기

파일: conference_timely.py 프로젝트: eatseng/insight

    def load_data_to_s3(self, stats_data):

        data_str = StringIO()

        ordering = ["tid", "eb_id", "event_name", "event_type", "latitude", "longitude", "start_time_utc", "end_time_utc", "tweets", "created_at"]

        for data in stats_data:

            row_arr = []

            for field in ordering:
                val = data[field] if data[field] else None

                if val is None:
                    row_arr.append('\N')
                else:
                    row_arr.append(unicode(val))

            for data_type in ["word", "count"]:
                for i in xrange(10):
                    key = data_type + str(i + 1)

                    if key not in data:
                        row_arr.append(unicode('\N'))
                    else:
                        row_arr.append(unicode(data[key]))

            data_str.write('\007'.join(row_arr).encode('utf-8') + '\n')

        # Copy data for load to S3
        s3_connection = S3Connection(config.get('S3.access_key'), config.get('S3.secret'))
        bucket = s3_connection.get_bucket(config.get('S3.bucket'), validate=False)
        s3_file = Key(bucket)
        s3_file.key = self.S3_KEY
        data_str.seek(0)
        s3_file.set_contents_from_file(data_str)

예제 #53

0

파일 보기

파일: tweets.py 프로젝트: moondrop-entertainment/sportsCup

def tweets(ncaaf_or_nfl):
  """Find all tweets relevant to NCAA football games in a given week."""
  
  sportsdata_key = config.get('sportsdata.key')
  access_token_key = config.get('twitter.access_token_key')
  access_token_secret = config.get('twitter.access_token_secret')
  consumer_key = config.get('twitter.consumer_key')
  consumer_secret = config.get('twitter.consumer_secret')
  
  if ncaaf_or_nfl not in ('ncaaf', 'nfl'):
    abort(400)
  
  try:
    request_info = request.json
  except AttributeError:
    abort(400)
  if not request_info:
    abort(400)
  year = request_info.get('year', None)
  week = request_info.get('week', None)
  if not (year and week):
    abort(400)
  htags = sportsdata.sportsdatareq(week, year, sportsdata_key, ncaaf_or_nfl)
  return twitter.fetchsamples(htags, access_token_key, access_token_secret, consumer_key, consumer_secret)

예제 #54

0

파일 보기

파일: env.py 프로젝트: dtrapezoid/daterbasers

def run_migrations_offline():
    """Run migrations in 'offline' mode.

    This configures the context with just a URL
    and not an Engine, though an Engine is acceptable
    here as well.  By skipping the Engine creation
    we don't even need a DBAPI to be available.

    Calls to context.execute() here emit the given string to the
    script output.

    """
    # This is special to Clay
    url = clay_config.get("database")['sqlalchemy.url']
    context.configure(url=url)

    with context.begin_transaction():
        context.run_migrations()