Beispiel #1
0
 def connect(cls, settings):
     """ Bind the connection to the cassandra. """
     conf = Configuration('global')
     connection.setup(conf.get('cassandra.hosts', ['127.0.0.1:9160']),
                      username=conf.get('cassandra.user'),
                      password=conf.get('cassandra.password'),
                      consistency=conf.get('cassandra.consistency', 'ONE'))
Beispiel #2
0
def setup_connection():
    connection.setup(
        settings.FEEDLY_CASSANDRA_HOSTS,
        consistency=settings.FEEDLY_CASSANDRA_CONSISTENCY_LEVEL,
        default_keyspace=settings.FEEDLY_DEFAULT_KEYSPACE,
        metrics_enabled=settings.FEEDLY_TRACK_METRICS
    )
Beispiel #3
0
def setup_connection():
    connection.setup(
        settings.FEEDLY_CASSANDRA_HOSTS,
        consistency=settings.FEEDLY_CASSANDRA_CONSISTENCY_LEVEL,
        default_keyspace=settings.FEEDLY_DEFAULT_KEYSPACE,
        metrics_enabled=settings.FEEDLY_TRACK_CASSANDRA_DRIVER_METRICS,
        default_timeout=settings.FEEDLY_CASSANDRA_DEFAULT_TIMEOUT)
Beispiel #4
0
    def run(self):

       """Runs DB server and sync models with Cassandra coloumn family."""
       print 'inside db'
       connection.setup(self.database_ip, self.database_name)
       sync_table(products.ProductsDetails)
       print 'synched'
Beispiel #5
0
def setup_connection():
    connection.setup(
        settings.FEEDLY_CASSANDRA_HOSTS,
        max_connections=settings.FEEDLY_CASSANDRA_CONNECTION_POOL_SIZE,
        consistency=settings.FEEDLY_CASSANDRA_CONSITENCY_LEVEL,
        default_keyspace=settings.FEEDLY_DEFAULT_KEYSPACE
    )
Beispiel #6
0
 def connect(cls, settings):
     """ Bind the connection to the cassandra. """
     conf = Configuration('global')
     connection.setup(conf.get('cassandra.hosts', ['127.0.0.1:9160']),
                      username=conf.get('cassandra.user'),
                      password=conf.get('cassandra.password'),
                      consistency=conf.get('cassandra.consistency', 'ONE'))
Beispiel #7
0
def setup_connection():
    connection.setup(
        hosts=settings.STREAM_CASSANDRA_HOSTS,
        consistency=settings.STREAM_CASSANDRA_CONSISTENCY_LEVEL,
        default_keyspace=settings.STREAM_DEFAULT_KEYSPACE,
        **settings.CASSANDRA_DRIVER_KWARGS
    )
Beispiel #8
0
    def __init__(self, logger, config):
        self.logger = logger
        self.config = config

        routes = get_routes(movie)
        print("Routes\n======\n\n" +
              json.dumps([(url, repr(rh)) for url, rh in routes], indent=2))
        settings = dict(
            debug=options.debug,
            xsrf_cookies=False,
            # TODO: update manually
            cookie_secret='lpyoGs9/TAuA8IINRTRRjlgBspMDy0lKtvQNGrTnA9g=',
        )
        super(Application, self).__init__(routes=routes,
                                          generate_docs=True,
                                          settings=settings)

        # Connect to the keyspace on our cluster running at 127.0.0.1
        connection.setup(config.clusterNodes,
                         config.clusterKeyspace,
                         protocol_version=2)

        # Sync your model with your cql table
        sync_table(models.Users)
        sync_table(models.ActorID)
        sync_table(models.ActorFirst)
        sync_table(models.ActorLast)
        sync_table(models.ActorLastFirst)
        sync_table(models.Movie)
Beispiel #9
0
def setup_package():
    try:
        CASSANDRA_VERSION = int(os.environ["CASSANDRA_VERSION"])
    except:
        print("CASSANDRA_VERSION must be set as an environment variable. "
              "One of (12, 20, 21)")
        raise

    if os.environ.get('CASSANDRA_TEST_HOST'):
        CASSANDRA_TEST_HOST = os.environ['CASSANDRA_TEST_HOST']
    else:
        CASSANDRA_TEST_HOST = 'localhost'

    if CASSANDRA_VERSION < 20:
        protocol_version = 1
    else:
        protocol_version = 2

    connection.setup([CASSANDRA_TEST_HOST],
                     protocol_version=protocol_version,
                     default_keyspace='cqlengine_test')

    create_keyspace("cqlengine_test",
                    replication_factor=1,
                    strategy_class="SimpleStrategy")
def AddToCassandra_allcountsbatch_bypartition(d_iter):  # filter_missing_values=True for RDDs
    # from cassandra.cluster import Cluster
    from cqlengine import columns
    from cqlengine.models import Model
    from cqlengine import connection
    from cqlengine.management import sync_table

    # CASSANDRA_KEYSPACE = "wikipedia_jan_2015"
    CASSANDRA_KEYSPACE = "test"
    connection.setup(
        ["52.89.66.139", "52.89.34.7", "52.89.116.45", "52.89.78.4", "52.89.27.115", "52.89.133.147", "52.89.1.48"],
        CASSANDRA_KEYSPACE,
    )

    class url_ranks_links_23(Model):
        # primary key is url which is dictated by the number of links
        url = columns.Text(primary_key=True)
        ranks = columns.Float()  # this will be stored as a double # this is a primary key to sort on later
        links = columns.List(columns.Text)  # this will be stored as a double

        def __repr__(self):
            return "%s %s" % (self.url, self.ranks)

    sync_table(url_ranks_links_23)
    for d in d_iter:
        url_ranks_links_23.create(**d)
Beispiel #11
0
def global_setup(config):
    """Perform global cofiguration. In a given process, this should only
    ever be called with a single configuration instance. Doing otherwise
    will result in a runtime exception.
    """
    global _global_config
    if _global_config is None:
        _global_config = config
        # this breaks with unicode :(
        connection.setup([str(v) for v in config.CASSANDRA_CLUSTER],
                         consistency=config.CASSANDRA_CONSISTENCY)

        processors = [
            _capture_stack_trace,
            _format_event,
        ]

        if config.PRETTY_LOGGING:
            processors.append(structlog.processors.ExceptionPrettyPrinter())
            processors.append(structlog.processors.KeyValueRenderer())
        else:
            processors.append(structlog.processors.JSONRenderer())

        structlog.configure(
            processors=processors
        )
    elif _global_config != config:
        raise Exception('global_setup called twice with different '
                        'configurations')
Beispiel #12
0
def _init_db_connection(**kwargs):
    """
    Please refer to the link below to find out why we didn't establish
    db connection in __init__
        
        http://www.dctrwatson.com/2010/09/python-thread-safe-does-not-mean-fork-safe/

    In short, the db-connection handle of parent process would be copied to memory
    of child process by fork.
    """
    conf = Config()

    # this callback can't execute longer than 4 seconds, or would be interrupted by
    # celery
    connection.setup(hosts=conf.CQLENGINE_HOSTS, default_keyspace=const.CQL_KEYSPACE_NAME)

    # sqlalchemy
    rt = Runtime()
    rt.sql_session = sessionmaker(rt.sql_engine)

    # trigger registration of SQLAlchemy models
    from models import sql

    # create tables
    Preparation().Base.metadata.create_all(rt.sql_engine)
Beispiel #13
0
 def setUp(self):
     connection.setup(['127.0.0.1'], KEYSPACE)
     create_keyspace(KEYSPACE,
                     replication_factor=1,
                     strategy_class='SimpleStrategy')
     sync_table(Avatar)
     sync_table(Anchor)
     sync_table(Message)
    def setup(self):
        from cqlengine import connection
        if connection.cluster is not None:
            # already connected
            return
        connection.setup(self.hosts, self.keyspace, **self.connection_options)

        for option, value in self.session_options.iteritems():
            setattr(self.session, option, value)
    def setup(self):
        from cqlengine import connection
        if connection.cluster is not None:
            # already connected
            return
        connection.setup(self.hosts, self.keyspace, **self.connection_options)

        for option, value in self.session_options.iteritems():
            setattr(self.session, option, value)
Beispiel #16
0
def run():
    from cqlengine import connection

    connection.setup(['127.0.0.1'], "cqlengine")

    from cqlengine import management

    management.drop_table(Stock)
    management.sync_table(Stock)

    Stock.create(name="WPRO", prices={
        datetime.date(2014, 12, 1): 200
        , datetime.date(2014, 12, 2): 220.45
        , datetime.date(2014, 12, 3): 250.67
        , datetime.date(2014, 12, 4): 246.86
        , datetime.date(2014, 12, 5): 201
        , datetime.date(2014, 12, 6): 233
        , datetime.date(2014, 12, 7): 245
        , datetime.date(2014, 12, 8): 300
        , datetime.date(2014, 12, 9): 307
        , datetime.date(2014, 12, 10): 180
        , datetime.date(2014, 12, 11): 405
        , datetime.date(2014, 12, 12): 400
        , datetime.date(2014, 12, 13): 670
        , datetime.date(2014, 12, 14): 260
        , datetime.date(2014, 12, 15): 250
        , datetime.date(2014, 12, 16): 251
        , datetime.date(2014, 12, 17): 254
        , datetime.date(2014, 12, 18): 267
        , datetime.date(2014, 12, 19): 270
    }, events={
        datetime.date(2014, 12, 13): "Something happened over here",
        datetime.date(2014, 12, 19): "The bears are playing"
    })

    Stock.create(name="INFY", prices={
        datetime.date(2014, 8, 1): 3200
        , datetime.date(2014, 8, 2): 3220.45
        , datetime.date(2014, 8, 3): 3250.67
        , datetime.date(2014, 8, 4): 3246.86
        , datetime.date(2014, 8, 5): 3201
        , datetime.date(2014, 8, 6): 3233
        , datetime.date(2014, 8, 7): 3245
        , datetime.date(2014, 8, 8): 3300
        , datetime.date(2014, 8, 9): 3307
        , datetime.date(2014, 8, 10): 3180
        , datetime.date(2014, 8, 11): 3405
        , datetime.date(2014, 8, 12): 3400
        , datetime.date(2014, 8, 13): 3670
        , datetime.date(2014, 8, 14): 3260
        , datetime.date(2014, 8, 15): 3250
        , datetime.date(2014, 8, 16): 3251
        , datetime.date(2014, 8, 17): 3254
        , datetime.date(2014, 8, 18): 3267
        , datetime.date(2014, 8, 19): 3270
    })
    def setup(self):
        if connection.cluster is not None:
            # already connected
            return

        connection.setup(
            self.hosts,
            self.keyspace,
            **self.options.get('connection', {})
        )
 def setUp(self):
     keyspace = 'testkeyspace{}'.format(str(uuid.uuid1()).replace('-', ''))
     self.keyspace = keyspace
     clear()
     # Configure cqlengine's global connection pool.
     setup(['localhost'], default_keyspace=keyspace)
     create_keyspace(keyspace)
     for class_name, creator in self.model_classes.items():
         setattr(self, class_name, creator)
         #sync_table(getattr(self, class_name))
         getattr(self, class_name).sync_table()
    def setup(self):
        if connection.cluster is not None:
            # already connected
            return

        connection.setup(
            self.hosts,
            self.keyspace,
            consistency=self.options.get('consistency_level',
                                         ConsistencyLevel.ONE)
        )
Beispiel #20
0
def connect():
    if connected: return
    print "Connecting"
    setup(["localhost"], "meatbot")
    print "Connected"
    sync_table(User)
    sync_table(Project)
    sync_table(StatusUpdate)
    sync_table(StatusUpdateUserAggregated)
    print "Done Syncing"
    global connected
    connected = True
Beispiel #21
0
def connect():
    if connected: return
    print "Connecting"
    setup(["localhost"], "meatbot")
    print "Connected"
    sync_table(User)
    sync_table(Project)
    sync_table(StatusUpdate)
    sync_table(StatusUpdateUserAggregated)
    print "Done Syncing"
    global connected
    connected = True
Beispiel #22
0
def AddToCassandra_allcountsbatch_bypartition(d_iter):
        class userbase2(Model):
                from cqlengine import columns
                from cqlengine.models import Model
                from cqlengine import connection
                from cqlengine.management import sync_table
                CASSANDRA_KEYSPACE = "playground"
                uid = columns.Integer(primary_key=True)
                reviewerID = columns.Text(primary_key=True)
                reviewerName = columns.Text()
        connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE)
        sync_table(userbase2)
        for d in d_iter:
                userbase2.create(**d)
Beispiel #23
0
def AddToCassandra_allcountsbatch_bypartition(d_iter):
        from cqlengine import columns
        from cqlengine.models import Model
        from cqlengine import connection
        from cqlengine.management import sync_table
        CASSANDRA_KEYSPACE = "playground"
        class predictions3(Model):
                user = columns.Integer(primary_key=True)
                product = columns.Integer()
                rating = columns.Float(primary_key=True, clustering_order="DESC")
        connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE)
        sync_table(predictions3)
        for d in d_iter:
                predictions3.create(**d)
Beispiel #24
0
			def syncToCassandra(d_iter):
        			from cqlengine import columns
        			from cqlengine.models import Model
        			from cqlengine import connection
        			from cqlengine.management import sync_table
        			CASSANDRA_KEYSPACE = "playground"
        			connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE)
        			class recommendations9(Model):
               				uid = columns.Integer(primary_key=True)
               				mid = columns.Integer(primary_key=True)
               				rating = columns.Float()
        			sync_table(recommendations9)
        			for d in d_iter:
               				recommendations9.create(**d)
Beispiel #25
0
def AddToCassandra_stocktotalsbatch_bypartition(d_iter):
    from cqlengine import columns
    from cqlengine.models import Model
    from cqlengine import connection
    from cqlengine.management import sync_table
    
    class stock_totals_batch(Model):
        user = columns.Text(primary_key=True)
        portfolio_total = columns.Integer()
        
    host="ec2-54-215-237-86.us-west-1.compute.amazonaws.com" #cassandra seed node, TODO: do not hard code this
    connection.setup([host], "finance_news")
    sync_table(stock_totals_batch)
    for d in d_iter:
        stock_totals_batch.create(**d)
def syncToCassandra(d_iter):
        from cqlengine import columns
        from cqlengine.models import Model
        from cqlengine import connection
        from cqlengine.management import sync_table
        CASSANDRA_KEYSPACE = "playground"
        class movieprofile9(Model):
                mid = columns.Integer(primary_key=True)
                asin = columns.Text()
                title = columns.Text()
                imurl = columns.Text()
        connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE)
        sync_table(movieprofile9)
        for d in d_iter:
                movieprofile9.create(**d)
Beispiel #27
0
def AddToCassandra_allcountsbatch_bypartition(d_iter):
        from cqlengine import columns
        from cqlengine.models import Model
        from cqlengine import connection
        from cqlengine.management import sync_table
        CASSANDRA_KEYSPACE = "playground"
        class reviewerProfile(Model):
                user = columns.Integer(primary_key=True)
                product = columns.Integer(primary_key=True)
                rating = columns.Float(primary_key=True, clustering_order="DESC")
                
        connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE)
        sync_table(reviewerProfile)
        for d in d_iter:
                reviewerProfile.create(**d)
Beispiel #28
0
def AddToCassandra_allcountsbatch_bypartition(d_iter):
    from cqlengine import columns
    from cqlengine.models import Model
    from cqlengine import connection
    from cqlengine.management import sync_table
    CASSANDRA_KEYSPACE = "playground"

    class reviewerProfile(Model):
        reviewerID = columns.Text(primary_key=True)
        reviews = columns.Map(columns.Text, columns.Float)

    connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE)
    sync_table(reviewerProfile)
    for d in d_iter:
        reviewerProfile.create(**d)
Beispiel #29
0
def AddToCassandra_stocktotalsbatch_bypartition(d_iter):
    from cqlengine import columns
    from cqlengine.models import Model
    from cqlengine import connection
    from cqlengine.management import sync_table

    class stock_totals_batch(Model):
        user = columns.Text(primary_key=True)
        portfolio_total = columns.Integer()

    host = "ec2-54-215-237-86.us-west-1.compute.amazonaws.com"  #cassandra seed node, TODO: do not hard code this
    connection.setup([host], "finance_news")
    sync_table(stock_totals_batch)
    for d in d_iter:
        stock_totals_batch.create(**d)
Beispiel #30
0
def AddToCassandra_allcountsbatch_bypartition(d_iter):
    class userbase2(Model):
        from cqlengine import columns
        from cqlengine.models import Model
        from cqlengine import connection
        from cqlengine.management import sync_table
        CASSANDRA_KEYSPACE = "playground"
        uid = columns.Integer(primary_key=True)
        reviewerID = columns.Text(primary_key=True)
        reviewerName = columns.Text()

    connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE)
    sync_table(userbase2)
    for d in d_iter:
        userbase2.create(**d)
def syncToCassandra(d_iter):
        from cqlengine import columns
        from cqlengine.models import Model
        from cqlengine import connection
        from cqlengine.management import sync_table
        CASSANDRA_KEYSPACE = "playground"
        class userprofile9(Model):
                uid = columns.Integer(primary_key=True)
                reviewerid = columns.Text()
                reviewername = columns.Text()
                numofreviews = columns.Float()
                ratings = columns.Map(columns.Text, columns.Float)
        connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE)
        sync_table(userprofile9)
        for d in d_iter:
                userprofile9.create(**d)
Beispiel #32
0
            def syncToCassandra(d_iter):
                from cqlengine import columns
                from cqlengine.models import Model
                from cqlengine import connection
                from cqlengine.management import sync_table
                CASSANDRA_KEYSPACE = "playground"
                connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE)

                class recommendations9(Model):
                    uid = columns.Integer(primary_key=True)
                    mid = columns.Integer(primary_key=True)
                    rating = columns.Float()

                sync_table(recommendations9)
                for d in d_iter:
                    recommendations9.create(**d)
def AddToCassandra_allcountsbatch_bypartition(d_iter):
    from cqlengine import columns
    from cqlengine.models import Model
    from cqlengine import connection
    from cqlengine.management import sync_table

    CASSANDRA_KEYSPACE = "playground"

    class reviewerProfile(Model):
        reviewerID = columns.Text(primary_key=True)
        reviews = columns.Map(columns.Text, columns.Float)

    connection.setup(["172.31.39.226"], CASSANDRA_KEYSPACE)
    sync_table(reviewerProfile)
    for d in d_iter:
        reviewerProfile.create(**d)
Beispiel #34
0
    def setup(self, force=False, throw=False):
        if self._setup and not force:
            return True

        try:
            connection.setup(self.uri, self.keyspace)
            management.create_keyspace(self.keyspace, replication_factor=1, strategy_class='SimpleStrategy')
            for model in self._models:
                model.__keyspace__ = self.keyspace
                management.sync_table(model)
        except NoHostAvailable:
            logger.error('Could not connect to Cassandra, expect errors.')
            return False

        # Note: return values are for test skipping
        self._setup = True
        return True
Beispiel #35
0
def AddToCassandra_allcountsbatch_bypartition(d_iter):
        class movieCatalog2(Model):
                from cqlengine import columns
                from cqlengine.models import Model
                from cqlengine import connection
                from cqlengine.management import sync_table
                CASSANDRA_KEYSPACE = "playground"
                pid = columns.Integer(primary_key=True)
                asin = columns.Text(primary_key=True)
                brand = columns.Text()
                imUrl = columns.Text()
                price = columns.Float()
                title = columns.Text()
        connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE)
        sync_table(movieCatalog2)
        for d in d_iter:
                movieCatalog2.create(**d)
def syncToCassandra(d_iter):
    from cqlengine import columns
    from cqlengine.models import Model
    from cqlengine import connection
    from cqlengine.management import sync_table
    CASSANDRA_KEYSPACE = "playground"

    class movieprofile9(Model):
        mid = columns.Integer(primary_key=True)
        asin = columns.Text()
        title = columns.Text()
        imurl = columns.Text()

    connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE)
    sync_table(movieprofile9)
    for d in d_iter:
        movieprofile9.create(**d)
Beispiel #37
0
def AddToCassandra_allcountsbatch_bypartition(d_iter):
    class movieCatalog2(Model):
        from cqlengine import columns
        from cqlengine.models import Model
        from cqlengine import connection
        from cqlengine.management import sync_table
        CASSANDRA_KEYSPACE = "playground"
        pid = columns.Integer(primary_key=True)
        asin = columns.Text(primary_key=True)
        brand = columns.Text()
        imUrl = columns.Text()
        price = columns.Float()
        title = columns.Text()

    connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE)
    sync_table(movieCatalog2)
    for d in d_iter:
        movieCatalog2.create(**d)
Beispiel #38
0
def main():
    setup(settings["CASSANDRA_CLUSTER"], settings["CASSANDRA_KEYSPACE"])
    print "Gettings first 100 models from DB:"
    for model in SavolModel.objects.all()[:100]:
        print model.savol_id
        print "---------------------"
        print model.title
        print model.question
        print model.answer
        print model.year
        print model.month
        print model.date
        # print "{year}/{month}/{date}".format(
        #     year=model.year,
        #     month=model.month,
        #     date=model.date
        # )
        print "---------------------" 
def syncToCassandra(d_iter):
    from cqlengine import columns
    from cqlengine.models import Model
    from cqlengine import connection
    from cqlengine.management import sync_table
    CASSANDRA_KEYSPACE = "playground"

    class userprofile9(Model):
        uid = columns.Integer(primary_key=True)
        reviewerid = columns.Text()
        reviewername = columns.Text()
        numofreviews = columns.Float()
        ratings = columns.Map(columns.Text, columns.Float)

    connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE)
    sync_table(userprofile9)
    for d in d_iter:
        userprofile9.create(**d)
Beispiel #40
0
    def __init__(self, table_name) -> None:
        super().__init__()
        self.KEYSPACE = 'keyspace_name'
        self.TABLE_NAME = table_name
        if self.TABLE_NAME == 'all_stored_samples':
            self.clean_table = True
        else:
            self.clean_table = False

        cluster = Cluster(['cassandra_api'], port=9042)
        # cluster = Cluster(['127.0.0.1'], port=9042)
        self.session = cluster.connect()
        self.session.row_factory = dict_factory
        self.create_keyspace(self.session)
        self.session.set_keyspace(self.KEYSPACE)
        setup(hosts=['cassandra_api'], default_keyspace=self.KEYSPACE)
        # setup(hosts=['127.0.0.1'], default_keyspace=self.KEYSPACE)

        self.create_samples_table()
Beispiel #41
0
def tweets_to_cassandra(items):
    from cqlengine import columns
    from cqlengine import connection
    from cqlengine.models import Model
    from cqlengine.management import sync_table
    from cqlengine.management import create_keyspace

    class TweetModel(Model):
        date = columns.Text(primary_key = True)    
        ticker = columns.Text()    

    host="localhost"
    connection.setup(['127.0.0.1'], "cqlengine")
    create_keyspace("cqlengine", "default_keyspace", 1)
    sync_table(TweetModel)
    for item in items:
        tweet_table.create(items)

    print "Number of elements in table:",TweetModel.objects.count()
Beispiel #42
0
def extract_queried_urls_ranks_links(res,url_total,ranks_total,links_total,links_listedPerURL):
    from cqlengine import connection
    from cassandra.cluster import Cluster
    CASSANDRA_KEYSPACE = "test"
    connection.setup(['52.88.228.98','52.11.49.170'],CASSANDRA_KEYSPACE)
    cluster = Cluster(['52.88.228.98','52.11.49.170'])
    session = cluster.connect(CASSANDRA_KEYSPACE)
    #Loop through ElasticSearch then loop through 
    for hit in res['hits']['hits']:
        key_lookup= ("%(url)s"% hit["_source"])
        #modify the url so that cqlsh can read it, even though it has a colon
        key_lookup="'" + key_lookup + "';"
        query1 = "SELECT * FROM url_ranks_4 WHERE url="+key_lookup
        #print query1
        #consult the cassandra table with the above query
        rows=(session.execute(query1))
        #check to make sure we are not taking information from nodes that are not currently in elastic search or cassandra
        for row in rows:
            if row[1] != None: #row[0]=url, row[1]=rank
                
                ###ADD HERE###
                #Declare filtering step by page rank value
                #Restrict to the top 1000 to avoid crashing the EC2 micro instance
                ###ADD HERE###
                
                #print row[0]
                key_lookup_for_links="'" + str(row[0]) + "';"
                #print key_lookup
                query2 = "SELECT * FROM url_links_3 WHERE url="+key_lookup_for_links
                #print query2
                rows_links=(session.execute(query2))
                #have a method to export all information here
                for row_links in rows_links:
                    #print row_links[0], row[1], row_links[1]
                    print row_links[0], row[1], len(row_links[1])
                    url_total.append(row_links[0])
                    ranks_total.append(row[1])
                    links_total.append(row_links[1])
                    links_listedPerURL.append(row_links[1])
                    print
    #Uncertain if these need to be returned or not
    return(url_total,ranks_total,links_total,links_listedPerURL)
def test(d_iter):
    from cqlengine import columns
    from cqlengine.models import Model
    from cqlengine import connection
    from cqlengine.management import sync_table
    from cqlengine.query import ModelQuerySet
    CASSANDRA_KEYSPACE = "playground"
    class table1_20150928(Model):
        link_id = columns.Text(primary_key=True)
        comment_id = columns.Text(primary_key=True)
	source = columns.Text()
        title = columns.Text()
	permalink = columns.Text() 
	subreddit = columns.Text()
	subreddit_id = columns.Text()
	selftext = columns.Text()
	created = columns.Text()
	score = columns.Text()
	url = columns.Text()
    connection.setup(['172.31.6.150'], CASSANDRA_KEYSPACE)
    sync_table(table1_20150928)
    for d in d_iter:
        table1_20150928.create(**d)
def AddToCassandra_allhuecountsbatch_bypartition(d_iter):
    from cqlengine import columns
    from cqlengine.models import Model
    from cqlengine import connection
    from cqlengine.management import sync_table

    class allhuecountsbatch(Model):
        granularity = columns.Text(primary_key=True)
        country = columns.Text(primary_key=True)
        region = columns.Text(primary_key=True)
        county = columns.Text(primary_key=True)
        locality = columns.Text(primary_key=True)
        datetaken = columns.Text(primary_key=True)
        count = columns.Integer()
        maxhueidxs = columns.List(columns.Integer())
        maxhue = columns.Float()
        huevalues = columns.List(columns.Integer())

    connection.setup(['127.0.0.1'], CASSANDRA_KEYSPACE)

    sync_table(allhuecountsbatch)

    for d in d_iter:
        allhuecountsbatch.create(**d)
Beispiel #45
0
def run_migrations_online():
    """Run migrations in 'online' mode.

    In this scenario we need to create an Engine
    and associate a connection with the context.

    """
    settings = config.get_section(config.config_ini_section)
    hosts = settings['cqlengine.hosts']
    keyspace = settings['cqlengine.keyspace']

    # cqlengine uses a global setup that is shared by any usage of the module.
    setup(hosts, default_keyspace=keyspace)
    # ConnectionProxy talks to the global cqlengine connection.
    context.configure(
                connection=ConnectionProxy,
                target_metadata=target_metadata
                )

    try:
        with context.begin_transaction():
            context.run_migrations()
    finally:
        ConnectionProxy.close()
Beispiel #46
0
#first, define a model
from cqlengine import columns
from cqlengine.models import Model
import uuid

class ExampleModel(Model):
    read_repair_chance = 0.05 # optional - defaults to 0.1
    example_id      = columns.UUID(primary_key=True, default=uuid.uuid4)
    example_type    = columns.Integer(index=True)
    created_at      = columns.DateTime()
    description     = columns.Text(required=False)

#next, setup the connection to your cassandra server(s)...
from cqlengine import connection
connection.setup(['localhost:9160'], 
  username='******', 
  password='******'
)

#...and create your CQL table
from cqlengine.management import sync_table
sync_table(ExampleModel)

#now we can create some rows:
em1 = ExampleModel.create(example_type=0, description="example1", created_at=datetime.now())
em2 = ExampleModel.create(example_type=0, description="example2", created_at=datetime.now())
em3 = ExampleModel.create(example_type=0, description="example3", created_at=datetime.now())
em4 = ExampleModel.create(example_type=0, description="example4", created_at=datetime.now())
em5 = ExampleModel.create(example_type=1, description="example5", created_at=datetime.now())
em6 = ExampleModel.create(example_type=1, description="example6", created_at=datetime.now())
em7 = ExampleModel.create(example_type=1, description="example7", created_at=datetime.now())
em8 = ExampleModel.create(example_type=1, description="example8", created_at=datetime.now())
Beispiel #47
0
    ticket_id = columns.Text(primary_key=True)
    created_dt = columns.DateTime(default=datetime.now())
    pg = columns.Text()
    duration = columns.Text()
    error_count = columns.Text()
    outage_caused = columns.Text()
    system_caused = columns.Text()
    ticket_type = columns.Text()
    row_create_ts = columns.DateTime(default=datetime.now())
    row_end_ts = columns.DateTime(default='9999-12-31 00:00:00.00000-00')


#connection.setup(['127.0.0.1'], "cqlengine", protocol_version=3)
#sync_table(Tickets)
from cqlengine import connection
connection.setup(["localhost"], "sid")
# sync_table(Tickets)
# sync_table(Tickets_Division)
# sync_table(Tickets_Duration)
# sync_table(Tickets_Pg)
# sync_table(Tickets_Error_Count)
# sync_table(Tickets_Outage_Caused)
# sync_table(Tickets_System_Caused)
# sync_table(Tickets_Ticket_Type)
# sync_table(Tickets_Addt_Notes)

cluster = Cluster(['127.0.0.1'])
session = cluster.connect("sid")

# result = session.execute("select * from tickets ")
# print dir(result)
Beispiel #48
0
class adsinfo(Model):

    ad_id = columns.BigInt(primary_key=True)
    actions_per_impression = columns.Float()
    clicks = columns.Float()
    cost_per_unique_click = columns.Float()
    cost_per_result = columns.Float()
    result_rate = columns.Float()
    reach = columns.Integer()
    date_start = columns.Text(primary_key=True)
    date_stop = columns.Text()


# Connect to the demo keyspace on our cluster running at 127.0.0.1
connection.setup(['127.0.0.1'], "fb_report")

#Create a sql context
sc = SparkContext("spark://ip-172-31-9-43:7077", "fb_report")
sqlContext = SQLContext(sc)

#read json data from hdfs
path = "hdfs://ec2-52-8-165-110.us-west-1.compute.amazonaws.com:9000/user/AdReport/ads_info/history"
ad_camps = sqlContext.jsonFile(path)

#Sync your model with your cql table
sync_table(adsinfo)

# Register this SchemaRDD as a table.
ad_camps.registerTempTable("ad_camps")
Beispiel #49
0
#I am assuming that code placed here will run on startup?
from cassandra.cluster import Cluster
from cqlengine.connection import setup
from cqlengine.management import sync_table
from feed_engine.models import Photo, BlogPost, StatusUpdate, Video, User, Relationship, Comment

# print "Syncing tables ..."
#
from users.models import UserProfile

setup(['192.168.10.200', '192.168.10.201', '192.168.10.202'], "yookore")
sync_table(Photo)
sync_table(BlogPost)
sync_table(StatusUpdate)
sync_table(Video)
sync_table(User)
sync_table(Relationship)
sync_table(Comment)
sync_table(UserProfile)
Beispiel #50
0
def setup_connection():
    connection.setup(hosts=settings.FEEDLY_CASSANDRA_HOSTS,
                     consistency=settings.FEEDLY_CASSANDRA_CONSISTENCY_LEVEL,
                     default_keyspace=settings.FEEDLY_DEFAULT_KEYSPACE,
                     **settings.CASSANDRA_DRIVER_KWARGS)
Beispiel #51
0
from cqlengine.models import Model
from cqlengine import connection
from cqlengine.management import sync_table


# define model for desired table
class userfollow(Model):
    username = columns.Text(primary_key=True)
    following = columns.List(columns.Text)

    def __repr__(self):
        return '%s %d' % (self.username, self.following)


# setup connection to Cassandra and sybc table
connection.setup(['52.8.127.252', '52.8.41.216'], "watch_events")
sync_table(userfollow)

start = time.time()

# access token
github_pass_alvin = os.environ['my_pass']

following_url = "https://api.github.com/users/"

per_page = "&per_page=100"

# call github API and return following list
# return False if username doesn't exist
i = 0
Beispiel #52
0
# coding: utf-8

# In[2]:

from cqlengine import columns
from cqlengine.models import Model
from cqlengine import connection
from cqlengine.management import sync_table
CASSANDRA_KEYSPACE = "playground"
connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE)
import json

# In[3]:

from pyspark import SparkConf, SparkContext
from pyspark.sql import SQLContext
sc = SparkContext()
sqlContext = SQLContext(sc)
#df = sqlContext.read.json("s3n://patricks3db/reviews_Movies_and_TV_small.json")
df = sqlContext.read.json("s3n://patricks3db/reviews_Movies_and_TV.json")
df.printSchema()

# In[4]:

ratings = df
ratings = ratings.drop("helpful")
ratings = ratings.drop("reviewText")
ratings = ratings.drop("reviewTime")
ratings = ratings.drop("reviewerName")
ratings = ratings.drop("summary")
ratings = ratings.drop("unixReviewTime")
Beispiel #53
0
Cassandra manager
"""

from cqlengine import columns
from cqlengine.models import Model
from cqlengine import connection


"******************************************************************************************"
cassandra_cluster_ip = ['127.0.0.1']
cassandra_keyspace = 'tsunami_project'
"******************************************************************************************"


# setup the connection to our cassandra server(s) and the default keyspace
connection.setup(cassandra_cluster_ip, cassandra_keyspace)


# mapper object with Cassandra model
class tsunami_table(Model):
    code_gsm = columns.Text(primary_key=True)
    timeslot        = columns.Integer(primary_key=True)
    phone      = columns.List(columns.Integer)


"""
Get the phone numbers to send them SMS alert for each code_gsm
"""
def get_phone_numbers(code_gsm, timeslot):
    phones_list = []
    try:
Beispiel #54
0
from gsmtpd import LMTPServer
from cqlengine import connection
from caliop.config import Configuration

Configuration.load('caliop.yaml', 'global')
connection.setup(['127.0.0.1:9160'])

from caliop.core.config import includeme
includeme(None)

from caliop.helpers.log import log
from caliop.smtp.agent import DeliveryAgent


class LmtpServer(LMTPServer):
    def process_message(self, peer, mailfrom, rcpttos, data):
        agent = DeliveryAgent()
        messages = agent.process(mailfrom, rcpttos, data)
        log.info('Deliver of %d messages' % len(messages))
        return None


if __name__ == '__main__':
    s = LmtpServer(("127.0.0.1", 4000))
    s.serve_forever()
Beispiel #55
0
from cqlengine import columns
from cqlengine.models import Model
from cqlengine import connection
from cqlengine.management import sync_table
import os

# defining schema to write to cassandra table
# schema defined should exactly match the table created in cassandra
class getuserid(Model):
  username = columns.Text(primary_key=True)
  userid = columns.Integer()
  def __repr__(self):
    return '%s %d' % (self.username, self.userid)

# connecting to cassandra key space "watch_events" and syncing the desired table
connection.setup(['127.0.0.1'], "watch_events")

# getting master node's IP and public DNS to run Spark job and read from HDFS
master_ip = os.environ['master_ip']
master_public_dns = os.environ['master_public_dns']

# setting SparkContext and SQLContext
sc = SparkContext("spark://" + master_ip + ":7077", "userid")
sqlContext = SQLContext(sc)

# reading data for collected usernames
df = sqlContext.jsonFile("hdfs://" + master_public_dns + ":9000/camus/topics/github-usernames-good-1/hourly/2015/06/21/00/*")

# Spark job to get just the login names and IDs from all the fields in the data
names = df.map(lambda x: (x.login, x.id)).collect()
Beispiel #56
0
import uuid
from cqlengine import columns
from cqlengine.models import Model


class ExampleModel(Model):
    read_repair_chance = 0.05  # optional - defaults to 0.1
    example_id = columns.UUID(primary_key=True, default=uuid.uuid4)
    example_type = columns.Integer(index=True)
    created_at = columns.DateTime()
    description = columns.Text(required=False)


#next, setup the connection to your cassandra server(s) and the default keyspace...
from cqlengine import connection
connection.setup(['127.0.0.1'], "cqlengine")

# or if you're still on cassandra 1.2
#connection.setup(['127.0.0.1'], "cqlengine", protocol_version=1)

# create your keyspace.  This is, in general, not what you want in production
# see https://cassandra.apache.org/doc/cql3/CQL.html#createKeyspaceStmt for options
from cqlengine.management import create_keyspace
create_keyspace("cqlengine", "SimpleStrategy", 1)

#...and create your CQL table
from cqlengine.management import sync_table
sync_table(ExampleModel)

#now we can create some rows:
em1 = ExampleModel.create(example_type=0,
Beispiel #57
0
import sys

sys.path.append("")

# setup cassandra
from cqlengine import connection

try:
    CASSANDRA_VERSION = int(os.environ["CASSANDRA_VERSION"])
except:
    print(
        "CASSANDRA_VERSION must be set as an environment variable. One of (12, 20, 21)"
    )
    raise

if os.environ.get('CASSANDRA_TEST_HOST'):
    CASSANDRA_TEST_HOST = os.environ['CASSANDRA_TEST_HOST']
else:
    CASSANDRA_TEST_HOST = 'localhost'

if CASSANDRA_VERSION < 20:
    protocol_version = 1
else:
    protocol_version = 2

connection.setup([CASSANDRA_TEST_HOST],
                 protocol_version=protocol_version,
                 default_keyspace='cqlengine_test')

nose.main()
#Author: Filmon
#!/usr/bin/python
#This will push the outbound state table to cassandra

#import libraries
import os, sys
from cqlengine import columns
from cqlengine.models import Model
from cqlengine import connection
from cqlengine.management import sync_table

# Define a model
class outbound_state(Model):
	c_state = columns.Text(primary_key=True)
	c_count = columns.Text()
	c_year = columns.Text(primary_key=True,clustering_order="DESC")
	def __repr__(self):
		return '%s %s %s' % (self.c_state,self.c_year,self.c_count)
connection.setup(['127.0.0.1'], "outbound_cassandra")
sync_table(outbound_state)
for line in sys.stdin:
	f = line.split('\t')
	outbound_state.create(c_year=str(f[0].strip()),c_state=str(f[1].strip()),c_count=str(f[2].strip()))

 def setup_cassandra(self):
     setup(hosts=['127.0.0.1'], default_keyspace=self.KEYSPACE)