def _connect(self, host, port, user, password):
     try:
         self.__connection = Accumulo(host, port, user, password)
         self.__log.debug('Connected to StaticFile Store')
     except Exception as e:
         self.__log.exception('Error while connecting to StaticFile Store: %s' % str(e))
         raise Exception('Error while connecting to StaticFile Store: %s' % str(e))
Ejemplo n.º 2
0
 def printTableDB(table):
     """ Displays the data in the database """
     conn = Accumulo(host="localhost",
                     port=50096,
                     user="******",
                     password="******")
     for entry in conn.scan(table):
         print(entry.row, entry.cf, entry.cq, entry.cv, entry.ts, entry.val)
     conn.close()
Ejemplo n.º 3
0
 def exportJsonDB(json_data, frameNum):
     """ Exports the JSON data to the Accumulo database """
     conn = Accumulo(host="localhost",
                     port=50096,
                     user="******",
                     password="******")
     json_data_parsed = json.loads(
         json_data)  #put json data back into dictionary
     table = json_data_parsed['videoMetadata'][
         'videoName']  #get the video name and set that as the table name
     table = table.replace('.', '_')
     table = table.encode('ascii', 'ignore')
     if not conn.table_exists(table):
         conn.create_table(table)
     m = Mutation("row_%d" %
                  frameNum)  #table row number is the frame number
     m.put(cf="cf2", cq="cq2", val=json_data_parsed['imageBase64']
           )  #saves the frame image separately from the metadata
     if 'LabeledImage' in json_data_parsed.keys():
         m.put(cf="cf3", cq="cq3", val=json_data_parsed['LabeledImage']
               )  #saves the labeled image separately from the metadata
         json_data_parsed.pop(
             'LabeledImage',
             None)  #delete the base64 representation of the labeled frame
     json_data_parsed.pop(
         'imageBase64',
         None)  #delete the base64 representation of the frame
     json_data = json.dumps(json_data_parsed)
     m.put(cf="cf1", cq="cq1",
           val=json_data)  #set the first column to now only the metadata.
     conn.write(table, m)
     conn.close()
Ejemplo n.º 4
0
def direct(config_path, namespace_string):
    with yakonfig.defaulted_config(
        [kvlayer], filename=config_path, params={"app_name": "kvlayer", "namespace": namespace_string}
    ):
        config = yakonfig.get_global_config("kvlayer")
        conn = Accumulo(
            host="test-accumulo-1.diffeo.com", port=50096, user=config["username"], password=config["password"]
        )

        yield conn

        tables = conn.list_tables()
        for table in tables:
            if re.search(namespace_string, table):
                conn.delete_table(table)
Ejemplo n.º 5
0
 def __connectToAccumulo(self, host, port, user, password):
     try:
         self.__dbConnection = Accumulo(host, port, user, password)
         self.__logger.debug('Successfully connected to CertStore')
     except Exception as ex:
         self.__logger.exception('Error in connecting to CertStore: %s' % str(ex))
         raise EzRPCertStoreException('Error in connecting to CertStore: %s' % str(ex))
 def __connectToAccumulo(self, host, port, user, password):
     try:
         self.__logger.info("connecting to CertStore ...")
         self.__dbConnection = Accumulo(host, port, user, password)
         self.__logger.info("Successfully connected to CertStore")
     except Exception as ex:
         self.__logger.exception("Error in connecting to CertStore: %s" % str(ex))
         raise EzRPCertStoreException("Error in connecting to CertStore: %s" % str(ex))
 def __init__(self,
              host="localhost",
              port=42424,
              user="******",
              password="******",
              num_trials=100,
              filename='default_file.txt',
              seed=None,
              signer_ids=test_ids,
              pki=test_pki):
     self.conn = Accumulo(host=host,
                          port=port,
                          user=user,
                          password=password)
     self.num_trials = num_trials
     self.filename = filename
     self.seed = seed
     self.signer_ids = signer_ids
     self.pki = pki
Ejemplo n.º 8
0
    def new(cls,
            elems,
            lbound,
            rbound,
            coin=BaseCoin(),
            conn_info=ConnInfo('localhost', 42424, 'root', 'secret'),
            table='__ADS_metadata___',
            elemclass=IntElem):
        """ Create a new skiplist that stores all of its data inside an
            Accumulo instance.

            Arguments:

            cls - the class implementing this class method
            elems - the elements to create the skiplist over
            lbound, rbound - the left and right boundary elements of the list
            coin - the source of randomness to use
                   (see pace.ads.skiplist.coin)
            conn_info - how to connect to the Accumulo instance being used
            table - the name of the table to store the ADS in
            elemclass - the class to use to store the elements in the skiplist
        """

        sl = cls(None, lbound, rbound, coin)

        if conn_info is not None:
            # For connecting to a live Accumulo instance
            host, port, user, password = conn_info
            conn = Accumulo(host=conn_info.host,
                            port=conn_info.port,
                            user=conn_info.user,
                            password=conn_info.password)
        else:
            # For testing/debug
            conn = FakeConnection()

        sl.conn = conn
        sl.table = table
        sl.elemclass = elemclass

        if not conn.table_exists(table):
            conn.create_table(table)

        right = cls.nodeclass.newnode(sl, None, None, rbound, True)
        left = cls.nodeclass.newnode(sl, None, right, lbound, True)

        sl.root = left

        for elem in elems:
            sl.insert(elem)

        return sl
Ejemplo n.º 9
0
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pyaccumulo import Accumulo, Mutation, Range
import settings

table = "pythontest"

conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD)

if conn.table_exists(table):
    conn.delete_table(table)

conn.create_table(table)
wr = conn.create_batch_writer(table)

print "Ingesting some data ..."
for num in range(1, 100):
    label = '%03d'%num
    mut = Mutation('r_%s'%label)
    mut.put(cf='cf_%s'%label, cq='cq1', val='value_%s'%label)
    mut.put(cf='cf_%s'%label, cq='cq2', val='value_%s'%label)
    wr.add_mutation(mut)
wr.close()
Ejemplo n.º 10
0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pyaccumulo import Accumulo, Mutation, Range
from pyaccumulo.iterators import *

from pyaccumulo.proxy.ttypes import IteratorSetting, IteratorScope
from examples.util import hashcode
import hashlib, re
import settings
import sys

conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD)

table = sys.argv[1]
if not conn.table_exists(table):
    print "Table '%s' does not exist."%table
    sys.exit(1)

search_terms = [term.lower() for term in sys.argv[2:] if len(term) > 3]

if len(search_terms) < 2:
    print "More than one term of length > 3 is required for this example"
    sys.exit(1)

for e in conn.batch_scan(table, iterators=[IndexedDocIterator(priority=21, terms=search_terms)]):
    print e.val
conn.close()
Ejemplo n.º 11
0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pyaccumulo import Accumulo, Mutation, Range
from pyaccumulo.iterators import *

from pyaccumulo.proxy.ttypes import IteratorSetting, IteratorScope
from examples.util import hashcode
import hashlib, re

import settings
conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD)

table = "regexes"
if conn.table_exists(table):
    conn.delete_table(table)
conn.create_table(table)

wr = conn.create_batch_writer(table)

license_file = "LICENSE"
linenum = 0

with file(license_file) as infile:
    for line in infile:
        linenum += 1
        
Ejemplo n.º 12
0
def randtask(q,state,x):
  n=random.randint(0,x)
  entry=None
  for entry in conn.scan(table,scanrange=Range(srow=q,erow=q),cols=[[state]]):
    if n == 0:
      break
    else:
      n=n-1
  if entry is None:
    return None
  else:
    return entry.cq


conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD)
table = settings.TABLE

if sys.argv[1] == "-c":
  print "create"
  wr = conn.create_batch_writer(table)
  i=0
  q="%s:%s"%(Q,sys.argv[2])
  mut = Mutation(q)
  for entry in conn.batch_scan(table,cols=[["Genome","md5"]],numthreads=10):
     genome=entry.row
     if i%1000 == 0:
       print entry.row
     mut.put(cf=QUEUED,cq=genome)
     i=i+1
  wr.add_mutation(mut)
Ejemplo n.º 13
0
    for tok in tokens:
        m.put(tok, cq=uuid, val="")
        if len(m.updates) > 1000:
            writer.add_mutation(m)
            m = Mutation(shard)

    if len(m.updates) > 0:
        writer.add_mutation(m)

try:
    table = sys.argv[1]
    input_dirs = sys.argv[2:]
except:
    usage()

conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD)

if not conn.table_exists(table):
    print "Creating table: %s"%table
    conn.create_table(table)

wr = conn.create_batch_writer(table)

for indir in input_dirs:
    for root, subFolders, files in os.walk(indir):
        for filename in files:
            filePath = os.path.join(root, filename)
            print "indexing file %s"%filePath
            uuid = get_uuid(filePath)
            with open( filePath, 'r' ) as f:
                write_mutations(wr, get_shard(uuid), uuid, filePath, get_tokens(f))
Ejemplo n.º 14
0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pyaccumulo import Accumulo, Mutation, Range
from pyaccumulo.iterators import *

from pyaccumulo.proxy.ttypes import IteratorSetting, IteratorScope
from examples.util import hashcode
import hashlib, re
import settings
import sys

conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD)

table = sys.argv[1]
if not conn.table_exists(table):
    print "Table '%s' does not exist."%table
    sys.exit(1)

search_terms = [term.lower() for term in sys.argv[2:] if len(term) > 3]

if len(search_terms) < 2:
    print "More than one term of length > 3 is required for this example"
    sys.exit(1)

uuids = []
for e in conn.batch_scan(table, scanranges=[Range(srow="s", erow="t")], iterators=[IntersectingIterator(priority=21, terms=search_terms)]):
    uuids.append(e.cq)
Ejemplo n.º 15
0
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pyaccumulo import Accumulo
from pyaccumulo.iterators import *

import settings
import sys

conn = Accumulo(host=settings.HOST,
                port=settings.PORT,
                user=settings.USER,
                password=settings.PASSWORD)

table = sys.argv[1]
if not conn.table_exists(table):
    print("Table '%s' does not exist." % table)
    sys.exit(1)

search_terms = [term.lower() for term in sys.argv[2:] if len(term) > 3]

if len(search_terms) < 2:
    print("More than one term of length > 3 is required for this example")
    sys.exit(1)

for e in conn.batch_scan(
        table, iterators=[IndexedDocIterator(priority=21,
Ejemplo n.º 16
0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pyaccumulo import Accumulo
from pyaccumulo.objects import Range
from pyaccumulo.iterators import *

import settings
import sys

conn = Accumulo(host=settings.HOST,
                port=settings.PORT,
                user=settings.USER,
                password=settings.PASSWORD)

table = sys.argv[1]
if not conn.table_exists(table):
    print("Table '%s' does not exist." % table)
    sys.exit(1)

search_terms = [term.lower() for term in sys.argv[2:] if len(term) > 3]

if len(search_terms) < 2:
    print("More than one term of length > 3 is required for this example")
    sys.exit(1)

uuids = []
for e in conn.batch_scan(
class Benchmarker(object):

    BENCHMARKS = [(100, 10), (500, 50), (1000, 100), (5000, 500),
                  (10000, 1000)]

    FANCY_BENCHMARKS = [(2**i, 2**(i - 1)) for i in range(2, 14)]

    def __init__(self,
                 host="localhost",
                 port=42424,
                 user="******",
                 password="******",
                 num_trials=100,
                 filename='default_file.txt',
                 seed=None,
                 signer_ids=test_ids,
                 pki=test_pki):
        self.conn = Accumulo(host=host,
                             port=port,
                             user=user,
                             password=password)
        self.num_trials = num_trials
        self.filename = filename
        self.seed = seed
        self.signer_ids = signer_ids
        self.pki = pki

    def run_test(self,
                 table="test_table_5",
                 default_vis="default",
                 num_entries=100,
                 num_rows=15,
                 signClassName='RSASSA_PKCS1-v1_5',
                 write=True,
                 benchmark=False):
        """ Runs one iteration of the signature test. If benchmark is set to
            True, returns the lengths of time it took to sign all the entries
            and the time it took to verify all the entries.
        """

        table = sanitize(table)
        seed = self.seed

        if signClassName == 'ALL':
            for signClass in ALL_SIGNATURES:
                self.run_test(table + '_' + sanitize(signClass.name),
                              default_vis, num_entries, num_rows,
                              signClass.name, write, benchmark)
            return

        signClass = SIGNATURE_FUNCTIONS[signClassName]

        pubkey, privkey = signClass.test_keys()

        if write:
            signer = AccumuloSigner(privkey, sig_f=signClass)

            if not seed:
                # set a new seed if one wasn't specified
                seed = str(time.time())

            generate_data(self.filename,
                          seed,
                          default_vis=default_vis,
                          num_entries=num_entries,
                          num_rows=num_rows)

            sout = write_and_sign_data(self.filename, self.conn, table, signer,
                                       benchmark)

        vout = verify_data(self.conn, table, pubkey, benchmark)

        if benchmark:
            sign_start, sign_end = sout
            verif_success, verif_start, verif_end = vout

            print "Time taken to sign: %s" % str(sign_end - sign_start)
            print "Time taken to verify: %s" % str(verif_end - verif_start)

            return sign_end - sign_start, verif_end - verif_start

    def run_benchmarks(self,
                       table_prefix="benchmarking",
                       default_vis="default"):
        """ Benchmarks each different signature class on a variety of table
            sizes, measuring the time taken to sign & verify all entries of each
            table size with each signature algorithm.
        """

        table_prefix = sanitize(table_prefix)

        for entries, rows in self.BENCHMARKS:
            print "==============================================================="
            print "Current benchmark: %d entries over %d rows" % (entries,
                                                                  rows)
            print "==============================================================="
            print
            for signClass in SUPPORTED_SIGNATURES:
                table = "%s_%s_e%d_r%d" % (
                    table_prefix, sanitize(signClass.name), entries, rows)
                print "Benchmarking %s" % (sanitize(signClass.name))
                self.run_test(table,
                              default_vis,
                              entries,
                              rows,
                              signClass.name,
                              write=True,
                              benchmark=True)
                print

    def run_fancy_benchmarks(self,
                             table_prefix="benchmarking",
                             default_vis="default",
                             resfile="benchmark_results.csv"):
        """ Runs more benchmarks than run_benchmarks(), then writes the output
            to a file.
        """

        table_prefix = sanitize(table_prefix)

        results = []
        for entries, rows in self.FANCY_BENCHMARKS:
            print "==============================================================="
            print "Current benchmark: %d entries over %d rows" % (entries,
                                                                  rows)
            print "==============================================================="
            print
            classres = []
            for signClass in SUPPORTED_SIGNATURES:
                table = "%s_%s_e%d_r%d" % (
                    table_prefix, sanitize(signClass.name), entries, rows)
                print "Benchmarking %s" % (sanitize(signClass.name))
                sign_time, verif_time = self.run_test(table,
                                                      default_vis,
                                                      entries,
                                                      rows,
                                                      signClass.name,
                                                      write=True,
                                                      benchmark=True)
                classres.append((signClass.name, sign_time, verif_time))
                print
            results.append((entries, classres))

        print 'time to write to file'
        with open(resfile, 'w') as f:
            f.write('num entries,name,sign time,verification time\n')
            for num_entries, classres in results:
                for name, stime, vtime in classres:
                    f.write(','.join(
                        [str(num_entries), name,
                         str(stime),
                         str(vtime)]))
                    f.write('\n')
        print 'wrote to file'

    def full_benchmark(self,
                       table_prefix="full_benchmarking",
                       default_vis="default",
                       signClass=None,
                       num_entries=10000,
                       num_rows=1000):
        """ Either run a single benchmark (sign & verify) on one signature
            class, or run it with no signing class (just write & read) to get
            a baseline time.
        """

        table_prefix = sanitize(table_prefix)

        conn = self.conn

        if signClass:
            table = table_prefix + '_' + sanitize(signClass.name)
        else:
            table = table_prefix + '_baseline'

        if signClass:
            pubkey, privkey = signClass.test_keys()
            signer = AccumuloSigner(privkey, sig_f=signClass)
            start_time = time.clock()
            write_and_sign_data(self.filename,
                                conn,
                                table,
                                signer,
                                benchmark=False)
            end_time = time.clock()
            total_sign_time = end_time - start_time

            start_time = time.clock()
            verify_data(conn, table, pubkey, benchmark=False)
            end_time = time.clock()
            total_verif_time = end_time - start_time
        else:
            start_time = time.clock()
            write_data(self.filename, conn, table)
            end_time = time.clock()
            total_sign_time = end_time - start_time

            count = 0
            start_time = time.clock()
            for entry in conn.scan(table):
                count += 1
            end_time = time.clock()
            total_verif_time = end_time - start_time

        return (total_sign_time, total_verif_time)

    def run_full_benchmarks(self,
                            table_prefix="full_benchmarking",
                            default_vis="default",
                            num_entries=10000,
                            num_rows=1000,
                            outfile='full_benchmark_out.csv'):
        """ Benchmark each signing algorithm, writing the results to a file,
            and comparing them to a baseline write & read with no signatures.
        """

        table_prefix = sanitize(table_prefix)

        n = generate_data(self.filename,
                          self.seed,
                          default_vis=default_vis,
                          num_entries=num_entries,
                          num_rows=num_rows)

        base_write_time, base_read_time = self.full_benchmark(
            table_prefix, default_vis, None, num_entries, num_rows)

        with open(outfile, 'w') as f:
            bw = (base_write_time / n) * 1000
            br = (base_read_time / n) * 1000
            f.write(','.join(['name', 'signing time', 'verification time']))
            f.write('\n')
            f.write(','.join(['baseline', str(bw), str(br)]))
            f.write('\n')
            for signClass in SUPPORTED_SIGNATURES:
                (st, vt) = self.full_benchmark(table_prefix, default_vis,
                                               signClass, num_entries,
                                               num_rows)

                # convert seconds for the whole batch to milliseconds
                # per element
                st = (st / n) * 1000
                vt = (vt / n) * 1000

                f.write(','.join([signClass.name, str(st), str(vt)]))
                f.write('\n')

    def fastfail_benchmark(self, table):
        """ Check how long it takes just to read each element from a table,
            to see if there's a difference because of the changed visibility
            fields in signed tables.
        """

        table = sanitize(table)

        start = time.clock()

        total = 0
        for e in self.conn.scan(table):
            total += 1
        end = time.clock()

        return end - start

    def run_fastfail_benchmarks(self,
                                table_prefix="fastfail_benchmarking",
                                default_vis="default",
                                num_rows=1000,
                                num_noisy_entries=50000,
                                num_noisy_rows=1000,
                                outfile='fastfail_benchmark_out_2.csv',
                                num_trials=100,
                                one_vis=False):
        """ Benchmark to see how much overhead there is from the signature code
            making Accumulo unable to fast-fail and cache results from
            visibility field checks.

            If one_vis is False, it will randomly generate a default visibility
            value for each field. If it is a string, that string will be treated
            as the default visibility value for each 'noise' field.
        """

        table_prefix = sanitize(table_prefix)

        seed = self.seed
        noisy_filename = 'noisy_' + self.filename

        if not seed:
            # set a new seed if one wasn't specified
            seed = str(time.time())

        if one_vis:
            print 'generating noise with one visibility field'
            generate_data(noisy_filename,
                          seed,
                          vis=False,
                          default_vis=one_vis,
                          num_entries=num_noisy_entries,
                          num_rows=num_rows)
        else:
            print 'generating noise with random visibility fields'
            generate_data(noisy_filename,
                          seed,
                          vis=True,
                          num_entries=num_noisy_entries,
                          num_rows=num_rows)

        noisy_table = 'noisy_' + table_prefix

        write_data(noisy_filename, self.conn, noisy_table)

        for sc in SUPPORTED_SIGNATURES:
            pubkey, privkey = sc.test_keys()
            signer = AccumuloSigner(privkey, sig_f=sc)
            write_and_sign_data(noisy_filename, self.conn,
                                '_'.join([table_prefix,
                                          sanitize(sc.name)]), signer)

        all_times = []

        for n in [(num_noisy_entries / 10000) * (10**i) for i in range(6)]:

            print 'n:', n

            generate_data(self.filename,
                          str(time.time()),
                          default_vis=default_vis,
                          num_entries=n,
                          num_rows=min(n, num_rows))
            write_data(self.filename, self.conn, noisy_table)

            base_time = sum([
                self.fastfail_benchmark(noisy_table) for j in range(num_trials)
            ])
            times = []

            for signClass in SUPPORTED_SIGNATURES:

                pubkey, privkey = signClass.test_keys()
                signer = AccumuloSigner(privkey, sig_f=signClass)
                table = '_'.join([table_prefix, sanitize(signClass.name)])

                write_and_sign_data(self.filename, self.conn, table, signer)

                times.append((signClass.name,
                              sum([
                                  self.fastfail_benchmark(table)
                                  for j in range(num_trials)
                              ])))

            all_times.append((n, base_time, times))

        with open(outfile, 'w') as f:
            for num_elems, base_time, trials in all_times:

                print 'Trial for %d elements. Base time: %s' % (num_elems,
                                                                str(base_time))

                f.write('%d,BASE,%s\n' % (num_elems, str(base_time)))

                for name, ttime in trials:
                    print '\t%s: %s' % (name, str(ttime))
                    f.write('%d,%s,%s\n' % (num_elems, name, str(ttime)))
                print

    def id_test(self,
                table_prefix="id_test",
                default_vis="default",
                num_entries=10000,
                num_rows=1000):

        table_prefix = sanitize(table_prefix)

        generate_data(self.filename,
                      self.seed,
                      default_vis=default_vis,
                      num_entries=num_entries,
                      num_rows=num_rows)

        for signer_id, sigclass in self.signer_ids:

            _, privkey = sigclass.test_keys()
            table = table_prefix + '_' + sanitize(signer_id)

            signer = AccumuloSigner(privkey,
                                    sig_f=sigclass,
                                    signerID=signer_id)
            write_and_sign_data(self.filename, self.conn, table, signer)
            verify_data(self.conn, table, self.pki, sigclass)

    def table_test(self,
                   table_prefix="table_test1",
                   default_vis="default",
                   num_entries=10000,
                   num_rows=1000):

        table_prefix = sanitize(table_prefix)

        generate_data(self.filename,
                      self.seed,
                      default_vis=default_vis,
                      num_entries=num_entries,
                      num_rows=num_rows)

        for signer_id, sigclass in self.signer_ids:

            _, privkey = sigclass.test_keys()

            table = table_prefix + '_' + sanitize(signer_id)

            signer = AccumuloSigner(privkey, sig_f=sigclass)
            write_and_sign_data(self.filename,
                                self.conn,
                                table,
                                signer,
                                include_table=True)
            verif_key, _ = self.pki.get_verifying_key(signer_id)
            verify_data(self.conn, table, verif_key, False, include_table=True)

    def location_test(self,
                      cfg_file,
                      table_prefix="table_test1",
                      default_vis="default",
                      num_entries=10000,
                      num_rows=1000):

        table_prefix = sanitize(table_prefix) + '_' + sanitize(loc)

        generate_data(self.filename,
                      self.seed,
                      default_vis=default_vis,
                      num_entries=num_entries,
                      num_rows=num_rows)

        for signer_id, sigclass in self.signer_ids:

            _, privkey = sigclass.test_keys()

            table = table_prefix + '_' + sanitize(signer_id)

            conf = new_config(cfg_file, self.conn)

            signer = AccumuloSigner(privkey, sig_f=sigclass, conf=conf)
            write_and_sign_data(self.filename, self.conn, table, signer)
            verif_key, _ = self.pki.get_verifying_key(signer_id)
            verify_data(self.conn, table, verif_key, False, conf=conf)
Ejemplo n.º 18
0
def main():
    parser = OptionParser()
    parser.add_option("-v",
                      '--verbose',
                      dest="verbose",
                      action="store_true",
                      default=False,
                      help="Verbose output")
    accumulo_group = OptionGroup(
        parser, 'Options that control the accumulo connection')
    accumulo_group.add_option('--host',
                              dest='host',
                              default='localhost',
                              help='Host for Accumulo. Default: localhost')
    accumulo_group.add_option('--user',
                              dest='user',
                              default='root',
                              help='User for Accumulo. Default: root')
    accumulo_group.add_option('--password',
                              dest='password',
                              default='secret',
                              help='Password for Accumulo user. Default: ...')
    accumulo_group.add_option('--port',
                              dest='port',
                              type='int',
                              default=42424,
                              help="Port for Accumulo. Default: 42424")
    parser.add_option_group(accumulo_group)

    output_group = OptionGroup(parser, 'Options that control output')
    output_group.add_option('--log-file',
                            dest='log_file',
                            default='output.log',
                            help='Output file for performance numbers')
    output_group.add_option('--table-prefix',
                            dest='table_prefix',
                            default='perf',
                            help='Prefix used for data tables')
    output_group.add_option('--profile',
                            dest='profile',
                            action='store_true',
                            default=False,
                            help="Profiles encryption code")
    output_group.add_option(
        '--cache_key',
        dest='cache_key',
        action='store_true',
        default=False,
        help='Keys are now cached during encryption and decryption')
    output_group.add_option(
        '--use_accumulo_keystore',
        dest='accumulo_keystore',
        action='store_true',
        default=False,
        help=
        "Keys are stored in Accumulo if option is included, otherwise they are stored locally"
    )
    parser.add_option_group(output_group)

    test_group = OptionGroup(parser,
                             "Options that control what tests are being run")
    test_group.add_option('--all',
                          dest='all',
                          action='store_true',
                          default=False,
                          help='Runs all the different tests')
    test_group.add_option(
        '--non-ceabac',
        dest='non_ceabac',
        action='store_true',
        default=False,
        help='Runs the non-CEABAC tests with a simple schema')
    test_group.add_option('--ceabac',
                          dest='ceabac',
                          action='store_true',
                          default=False,
                          help='Runs the CEABAC tests with a simple schema')
    test_group.add_option(
        '--vis-ceabac',
        dest='vis_ceabac',
        action='store_true',
        default=False,
        help='Runs CEABAC in CBC mode with varying visibility fields')
    test_group.add_option('--diff_schemas_ceabac',
                          dest='diff_ceabac',
                          action='store_true',
                          default=False,
                          help='Runs several different schemas for VIS_CBC')
    test_group.add_option('--diff_schemas_non_ceabac',
                          dest='diff_non_ceabac',
                          action='store_true',
                          default=False,
                          help='Runs several different schemas for AES_CBC')

    test_group.add_option(
        '--mixed_schemas',
        dest='mixed_schemas',
        action='store_true',
        default=False,
        help='Runs a set of schemas where the schemes are both CEABAC and not')
    parser.add_option_group(test_group)

    entries_group = OptionGroup(
        parser, "Options that control how many entries are run")
    entries_group.add_option('--num_entries',
                             dest='num_entries',
                             type='int',
                             default=1000,
                             help='Total number of cells being run')
    entries_group.add_option('--num_rows',
                             dest='num_rows',
                             type='int',
                             default=100,
                             help='Total number of rows being run')
    parser.add_option_group(entries_group)

    (cl_flags, _) = parser.parse_args()

    #set up logging
    if cl_flags.verbose:
        log_level = logging.DEBUG
    else:
        log_level = logging.INFO
    logging.basicConfig(filename=cl_flags.log_file,
                        level=log_level,
                        format='%(levelname)s-%(asctime)s: %(message)s')

    logger = logging.getLogger("performance_testing")

    #check inputs
    if cl_flags.all and (cl_flags.non_ceabac or cl_flags.ceabac
                         or cl_flags.vis_ceabac):
        logger.error(
            '--all is already specified, do not need to define other tests to run'
        )

    #create accumulo connection
    conn = Accumulo(host=cl_flags.host,
                    port=cl_flags.port,
                    user=cl_flags.user,
                    password=cl_flags.password)

    #create benchmarker
    if cl_flags.cache_key:
        logger.info('Using the caching version of the pki')
        pki = DummyCachingEncryptionPKI(
            conn=conn if cl_flags.accumulo_keystore else None)
    else:
        pki = DummyEncryptionPKI(
            conn=conn if cl_flags.accumulo_keystore else None)

    benchmarker = Benchmarker(logger=logger, pki=pki, conn=conn)

    if cl_flags.all:
        run_non_ceabac(benchmarker, cl_flags.table_prefix, logger,
                       cl_flags.profile, cl_flags)
        run_ceabac(benchmarker, cl_flags.table_prefix, logger,
                   cl_flags.profile, cl_flags)
        run_vis_ceabac(benchmarker, cl_flags.table_prefix, logger,
                       cl_flags.profile, cl_flags)
        run_diff_ceabac(benchmarker, cl_flags.table_prefix, logger,
                        cl_flags.profile, cl_flags)
        run_diff_non_ceabac(benchmarker, cl_flags.table_prefix, logger,
                            cl_flags.profile, cl_flags)
        run_mixed_schemas(benchmarker, cl_flags.table_prefix, logger,
                          cl_flags.profile, cl_flags)

    if cl_flags.non_ceabac:
        run_non_ceabac(benchmarker, cl_flags.table_prefix, logger,
                       cl_flags.profile, cl_flags)

    if cl_flags.ceabac:
        run_ceabac(benchmarker, cl_flags.table_prefix, logger,
                   cl_flags.profile, cl_flags)

    if cl_flags.vis_ceabac:
        run_vis_ceabac(benchmarker, cl_flags.table_prefix, logger,
                       cl_flags.profile, cl_flags)

    if cl_flags.diff_ceabac:
        run_diff_ceabac(benchmarker, cl_flags.table_prefix, logger,
                        cl_flags.profile, cl_flags)

    if cl_flags.diff_non_ceabac:
        run_diff_non_ceabac(benchmarker, cl_flags.table_prefix, logger,
                            cl_flags.profile, cl_flags)

    if cl_flags.mixed_schemas:
        run_mixed_schemas(benchmarker, cl_flags.table_prefix, logger,
                          cl_flags.profile, cl_flags)
class EzRPStaticStore(object):

    '''
    Class to save and retrieve static content from Accumulo.
    cf = "static"                   For all rows
    cq = "hash"                     Stores the hash_value of Static File
    cq = "nofchunks"                Stores the number of Chunks needed to store Static File
    cq = "chunk_000" .. "chunk_nnn" Stores the Chunks of Static File
    '''
    def __init__(self, host="localhost", port=42424, user='******', password='******', chunk_size=int(5*1048576), logger=None):
        self.__host = host
        self.__port = port
        self.__user = user
        self.__password = password
        self.__table = 'ezfrontend'
        self.__cf = 'static'
        self.__connection = None

        if logger is not None:
            self.__log = logger
        else:
            self.__log = logging.getLogger(self.__module__ + '.' + self.__class__.__name__)
            self.__log.addHandler(logging.NullHandler())

        self.__chunk_size =int(chunk_size)
        self._connect(self.__host, self.__port, self.__user, self.__password)

    def _connect(self, host, port, user, password):
        try:
            self.__connection = Accumulo(host, port, user, password)
            self.__log.debug('Connected to StaticFile Store')
        except Exception as e:
            self.__log.exception('Error while connecting to StaticFile Store: %s' % str(e))
            raise Exception('Error while connecting to StaticFile Store: %s' % str(e))

    def _ensureTableExists(self):
        '''
        Make sure that the table exists before any other operation.
        Reconnect to Accumulo if the Connection is reset.
        '''
        if not self.__connection.table_exists(self.__table):
            self.__log.info('table "{table}" does not exist in StaticFile Store. Creating the table'.format(table=self.__table))
            self.__connection.create_table(self.__table)
            if not self.__connection.table_exists(self.__table):
                self.__log.error('Unable to ensure StaticFile Store table "{table} exists'.format(format(table=self.__table)))
                raise Exception('StaticFile Store:  Unable to ensure table "{table}" exists'.format(table=self.__table))

    def _ensureNoDuplicates(self, usrFacingUrlPrefix):
        '''
         Ensure a single copy of file for a given usrFacingUrlPrefix
        '''
        if self._getHash(usrFacingUrlPrefix) is not None:
            self.deleteFile(usrFacingUrlPrefix)

    def _putNofChunks(self, usrFacingUrlPrefix, length):
        '''
        Put the number of chunks the static contents is stored
        '''

        chunks = int(math.ceil(length / float(self.__chunk_size)))
        writer = self.__connection.create_batch_writer(self.__table)
        m = Mutation(usrFacingUrlPrefix)
        m.put(cf=self.__cf, cq="nofchunks", val=str(chunks))
        writer.add_mutation(m)
        writer.close()

    def _getNofChunks(self, usrFacingUrlPrefix):
        '''
        Get the number of chunks the static contents is stored
        '''
        scan_range = Range(srow=usrFacingUrlPrefix, scf=self.__cf, scq="nofchunks",
                           erow=usrFacingUrlPrefix, ecf=self.__cf, ecq="nofchunks")
        for entry in self.__connection.scan(self.__table, scanrange=scan_range):
            return int(entry.val)
        return 0

    def _getChunks(self, data):
        '''
        Break the blob into CHUNK_SIZE.
        less than maxFrameSize in Accumulo proxy.properties
        '''
        data_length = len(data)
        for i in range(0, data_length + 1, self.__chunk_size):
            yield data[i:i + self.__chunk_size]

    def _putHash(self, usrFacingUrlPrefix, hash_str):
        '''
        Puts the Hash for usrFacingUrlPrefix
        '''
        writer = self.__connection.create_batch_writer(self.__table)
        m = Mutation(usrFacingUrlPrefix)
        m.put(cf=self.__cf, cq="hash", val=hash_str)
        writer.add_mutation(m)
        writer.close()

    def _getHash(self, usrFacingUrlPrefix):
        scan_range = Range(srow=usrFacingUrlPrefix, scf=self.__cf, scq="hash",
                           erow=usrFacingUrlPrefix, ecf=self.__cf, ecq="hash")
        for entry in self.__connection.scan(self.__table, scanrange=scan_range):
            return str(entry.val)
        else:
            return None

    def reConnection(self):
        self._connect(self.__host, self.__port, self.__user, self.__password)

    def putFile(self, usrFacingUrlPrefix, hash_str, data):
        self._ensureTableExists()
        self._ensureNoDuplicates(usrFacingUrlPrefix)
        self._putHash(usrFacingUrlPrefix, hash_str)
        data_length = len(data)
        self._putNofChunks(usrFacingUrlPrefix, data_length)
        writer = self.__connection.create_batch_writer(self.__table)
        for i, chunk in enumerate(self._getChunks(data)):
            m = Mutation(usrFacingUrlPrefix)
            m.put(cf=self.__cf, cq="chunk_{number:010d}".format(number=i), val=chunk)
            writer.add_mutation(m)
        self.__log.debug('added static file for "{url}" with hash "{hash}" of length "{length}"'.format(url=usrFacingUrlPrefix, hash=hash_str, length=data_length))
        writer.close()

    def getFile(self, usrFacingUrlPrefix):
        '''
        Assembles all the chunks for this row
        '''
        self._ensureTableExists()
        data = array.array('c') # Create a byte array
        chunks = self._getNofChunks(usrFacingUrlPrefix)
        chunks_read = 0
        for i in range(chunks):
             cq = 'chunk_{number:010d}'.format(number=i)
             for entry in self.__connection.scan(self.__table, None, cols=[[self.__cf, cq]]):
                 if entry.row == usrFacingUrlPrefix and entry.cq.startswith("chunk_"):
                     chunks_read += 1
                     data.extend(entry.val)

        # This code gets following error while retrieving over 96MB.  Data stops at first chunk_000
        # # java.lang.OutOfMemoryError: Java heap space
        # -XX:OnOutOfMemoryError="kill -9 %p"
        #   Executing /bin/sh -c "kill -9 32597"...
        # [1]+  Exit 137  sudo -u accumulo /opt/accumulo/current/bin/accumulo proxy -p /opt/accumulo/current/conf/proxy.properties

        # startChunk = "chunk_{number:010d}".format(number=0)
        # endChunk = "chunk_{number:010d}".format(number=chunks)
        # scan_range = Range(srow=usrFacingUrlPrefix, scf=self.__cf, scq=startChunk,
        #                    erow=usrFacingUrlPrefix, ecf=self.__cf, ecq=endChunk)
        # for entry in self.__connection.scan(self.__table, scanrange=scan_range):
        #     #self.__log.info("getFile: row = {0} cq= {1}".format(entry.row, entry.cq))
        #     if entry.cq.startswith("chunk_"):
        #         self.__log.info("getFile: row = {0} cq= {1}".format(entry.row, entry.cq))
        #         chunks_read += 1
        #         data.extend(entry.val)
        self.__log.debug('retrieved static file for {url}'.format(url=usrFacingUrlPrefix))
        if chunks_read != chunks:
            self.__log.error("did not read all the chunks from StaticFile Store")
        return data.tostring() if data.buffer_info()[1] > 0 else None

    def deleteFile(self, usrFacingUrlPrefix):
        self._ensureTableExists()
        writer = self.__connection.create_batch_writer(self.__table)
        chunks = self._getNofChunks(usrFacingUrlPrefix)
        m = Mutation(usrFacingUrlPrefix)
        m.put(cf=self.__cf, cq="hash", is_delete=True)
        m.put(cf=self.__cf, cq="nofchunks", is_delete=True)
        for i in range(chunks):
            cq = 'chunk_{number:010d}'.format(number=i)
            m.put(cf=self.__cf, cq=cq, is_delete=True)
        writer.add_mutation(m)
        self.__log.debug('removed static file for {url}'.format(url=usrFacingUrlPrefix))
        writer.close()

    def getAttributes(self):
        '''
        Returns the urlprefix and the hash of all the entries in table as tuple
        '''
        self._ensureTableExists()
        for entry in self.__connection.scan(self.__table, None, cols=[[self.__cf, "hash"]]):
            yield (entry.row, str(entry.val))
        else:
            yield (None, None)
 def connect(self):
     self._conn = Accumulo(host=self._host, port=self._port, user=self._user, password=self._password)        
class DBConnection:
    """ The interface to Accumulo.
    """

    __slots__ = ['_host','_port','_user','_password','_conn']
    
    def __init__(self, host, port, user, password):
        self._host = host
        self._port = port
        self._user = user
        self._password = password

    def connect(self):
        self._conn = Accumulo(host=self._host, port=self._port, user=self._user, password=self._password)        
 
    def query(self, plenario_session_state):
        dataset = plenario_session_state.get_dataset()
        if dataset != "dat_master":
            raise Exception("Querying currently only supported on dat_master!")
            
        ngon = plenario_session_state.get_ngon()
        if ngon is None:
            raise Exception("You must have an N-gon selected!")
        if len(ngon) != 5:
            raise Exception("Querying currently only supported for a 5-gon!")
        
        start_date = plenario_session_state.get_start_date() 
        end_date = plenario_session_state.get_end_date() 
        date_aggr = plenario_session_state.get_date_aggr()
    
        p0 = ngon[0]
        p1 = ngon[1]
        p2 = ngon[2]
        p3 = ngon[3]
        p4 = ngon[4]

        p0 = (41.88, -87.64)
        p1 = (41.89, -87.64)
        p2 = (41.89, -87.63 )
        p3 = (41.88, -87.63)
        p4 = (41.88, -87.635)         

        min_gh = geohash.encode(-89.9,-179.9)
        max_gh = geohash.encode(89.9,179.9)

        cells = []
        for cell in self._conn.batch_scan("dat_master", numthreads=10, scanranges=[Range(srow=min_gh, erow=max_gh)]):
            cells.append(cell)
    
        # Grouping key-value pairs that belong to same entry
        rows = {}
        for cell in cells:
            if cell.cq not in rows:
                rows[cell.cq] = {}
                rows[cell.cq]['ghash'] = cell.row
                rows[cell.cq][cell.cf] = cell.val
            rows[cell.cq][cell.cf] = cell.val

        # Filter out those that are not in the temporal range
        start_date = datetime.strptime(start_date, "%m/%d/%Y")
        end_date = datetime.strptime(end_date, "%m/%d/%Y")

        rows_filtered = {}

        for key, val in rows.iteritems():
                obs_date = datetime.strptime(val['obs_date'], "%Y-%m-%d %H:%M:%S")
                if start_date <= obs_date and obs_date <= end_date:
                    rows_filtered[key] = val

        # Filter out those that are not in the spatial range, i.e. within the polygon
        rows = rows_filtered
        rows_filtered = {}       

        #poly = Polygon([p0,p1,p2,p3,p4])
        poly = Polygon([(-90,-180),(90,-180),(90,180),(-90,180)])

        for key, val in rows.iteritems():
            ghash = val['ghash']
            pt = Point(geohash.decode(ghash))
            if poly.contains(pt):
                rows_filtered[key] = val 
    
        # Truncate date as specified by date_aggr and count group sizes
        rows = rows_filtered
        rows_filtered = {}

        for key, val in rows.iteritems():
            date = truncate(datetime.strptime(val['obs_date'], "%Y-%m-%d %H:%M:%S"),date_aggr)
            tup = (val['dataset_name'],date.isoformat())
            if tup not in rows_filtered:
                rows_filtered[tup] = 1
            else:
                rows_filtered[tup] = rows_filtered[tup]+1

        return rows_filtered
Ejemplo n.º 22
0
class EzRPCertStore(object):
    """
    Wrapper class to underlying database store which hold server certs for reverse proxy
    """

    def __init__(self, host='localhost', port=42424, user='******', password='******', table='ezfrontend', privateKey=None, logger=None):
        self.__table = table
        self.__signer = None
        self.__dbConnection = None
        self.__cf = "pfx"
        self.__cq = "enc"

        if logger is not None:
            self.__logger = logger
        else:
            self.__logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__)
            self.__logger.addHandler(logging.NullHandler())
        
        if privateKey is not None:
            self.__updateSigner(privateKey)

        self.__connectToAccumulo(host, port, user, password)


    def __connectToAccumulo(self, host, port, user, password):
        try:
            self.__dbConnection = Accumulo(host, port, user, password)
            self.__logger.debug('Successfully connected to CertStore')
        except Exception as ex:
            self.__logger.exception('Error in connecting to CertStore: %s' % str(ex))
            raise EzRPCertStoreException('Error in connecting to CertStore: %s' % str(ex))


    def __updateSigner(self, privateKey):
        with open(privateKey) as file:
            self.__signer = PKCS1_v1_5.new(RSA.importKey(file.read()))
            self.__logger.info('Updated signer for CertStore')


    def __ensureTable(self):
        if not self.__dbConnection.table_exists(self.__table):
            self.__logger.info('DB table %s doesn\'t exist in the Store. Creating ...' % self.__table)
            self.__dbConnection.create_table(self.__table)
            if not self.__dbConnection.table_exists(self.__table):
                self.__logger.error('Unable to ensure DB table exists in the Store.')
                raise EzRPCertStoreException('CertStore: Unable to ensure DB table exists in the Store.')


    def _generatePassword(self, serverName):
        password = '******' #salt
        
        if self.__signer is None:
            password = base64.b64encode(password + serverName)
        else:
            digest = SHA256.new(password + serverName)
            signature = self.__signer.sign(digest)
            password = base64.b64encode(signature)

        return password


    def _generatePkcs12(self, serverName, certContents, keyContents, password=None):
        key = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, keyContents)
        cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, certContents)
        
        pfx = OpenSSL.crypto.PKCS12()
        pfx.set_certificate(cert)
        pfx.set_privatekey(key)
        
        return pfx.export(passphrase=password)


    def _retrieveCertAndKey(self, pfx, serverName, password=None):
        p12 = OpenSSL.crypto.load_pkcs12(pfx, password)
        keycontents = OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, p12.get_privatekey())
        certContents = OpenSSL.crypto.dump_certificate( OpenSSL.crypto.FILETYPE_PEM, p12.get_certificate())
        return certContents, keycontents


    def put(self, serverName, certContents, keyContents):
        self.__ensureTable()

        writer = self.__dbConnection.create_batch_writer(self.__table)        
        value = self._generatePkcs12(serverName, certContents, keyContents, self._generatePassword(serverName))
    
        mutation = Mutation(serverName)
        mutation.put(cf=self.__cf, cq=self.__cq, val=value)
        writer.add_mutation(mutation)
        writer.close()
        self.__logger.debug('added cert/key contents for %s to store' % serverName)


    def get(self, serverName):
        self.__ensureTable()
        
        for entry in self.__dbConnection.scan(self.__table, cols=[[self.__cf, self.__cq]]):
            if entry.row == serverName:
                self.__logger.debug('retrieved cert/key for %s from store' % serverName)
                return self._retrieveCertAndKey(entry.val, serverName, self._generatePassword(serverName))
        return None, None


    def remove(self, serverName):
        self.__ensureTable()
        writer = self.__dbConnection.create_batch_writer(self.__table)
        mutation = Mutation(serverName)
        mutation.put(cf=self.__cf, cq=self.__cq, is_delete=True)
        writer.add_mutation(mutation)
        writer.close()
        self.__logger.debug('removed cert/key for %s from store' % serverName)


    def exists(self, serverName):
        self.__ensureTable()
        
        #use a sigle row range to narrow our scan
        range = Range(srow=serverName, scf=self.__cf, scq=self.__cq,
                      erow=serverName, ecf=self.__cf, ecq=self.__cq)
                      
        for entry in self.__dbConnection.scan(self.__table, scanrange=range):
            if entry.row == serverName:
                self.__logger('cert/key for %s exists in store' % serverName)
                return True
        self.__logger('cert/key for %s DOES NOT exist in store' % serverName)
        return False
Ejemplo n.º 23
0
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pyaccumulo import Accumulo
from pyaccumulo.objects import Mutation
from pyaccumulo.iterators import *

import settings
conn = Accumulo(host=settings.HOST,
                port=settings.PORT,
                user=settings.USER,
                password=settings.PASSWORD)

table = "regexes"
if conn.table_exists(table):
    conn.delete_table(table)
conn.create_table(table)

wr = conn.create_batch_writer(table)

license_file = "LICENSE"
linenum = 0

with open(license_file) as infile:
    for line in infile:
        linenum += 1
Ejemplo n.º 24
0
import numpy as np
import geohash
import datetime as dt
import random
import time
from shapely.geometry import Polygon
from shapely.geometry import Point


# Import Accumulo
from pyaccumulo import Accumulo, Mutation, Range

select_data = pd.read_csv("/home/ubuntu/select_data.csv")

# Connecting to Accumulo
conn = Accumulo(host="172.31.3.218",port=42424,user="******",password="******")

table = "Plenario_data"
conn.create_table(table)
# Writing Mutation
wr = conn.create_batch_writer(table)

for num in range(select_data.shape[0]):
    if (num%100000==0):
        print num
    m = Mutation(str(select_data.get_value(num,"Geohash")))
    # A mutation is an object that represents a row in the Accumulo Table
    m.put(cf=str(select_data.get_value(num,"Formated_date")), val=select_data.get_value(num,"Descript"))
#     m.put(cf="cf2", val="%d"%num)
    # Adding the row to the table    
Ejemplo n.º 25
0
from shapely.geometry import Point
from simulate_try import simulate_try


def geohash_min_max(polyon): 
    x1, y1, x2, y2 = polyon.bounds
    return (min(geohash.encode(x1, y2), geohash.encode(x1, y2), 
                geohash.encode(x2, y1), geohash.encode(x2, y2)), 
            max(geohash.encode(x1, y2), geohash.encode(x1, y2), 
                geohash.encode(x2, y1), geohash.encode(x2, y2)))

# Import Accumulo
from pyaccumulo import Accumulo, Mutation, Range

# Connecting to Accumulo
conn = Accumulo(host="172.31.3.218",port=42424,user="******",password="******")
table = "Plenario_data"

# poly = Polygon([(37.795542, -122.423058), 
#                (37.800019, -122.398853), 
#                (37.789302, -122.38821), 
#                (37.7737, -122.39542), 
#                (37.770036, -122.417736)])
now_latlong_1 = "37.795542, -122.423058"
now_latlong_2 = "37.800019, -122.398853"
now_latlong_3 = "37.789302, -122.38821"
now_latlong_4 = "37.78121, -122.39212"
now_latlong_5 = "37.770036, -122.417736"

for i in range(0,20):
    poly = Polygon(simulate_try())
Ejemplo n.º 26
0
"""
Reads lines from a data file and inserts a number of records
"""

from pyaccumulo import Accumulo, Mutation, Range
import settings
import sys
sys.path
sys.path.append('/bdsetup')

table = "well_logs"
table1 = "drill_logs"
conn = Accumulo(host=settings.HOST,
                port=settings.PORT,
                user=settings.USER,
                password=settings.PASSWORD)

if conn.table_exists(table):
    conn.delete_table(table)

conn.create_table(table)
wr = conn.create_batch_writer(table)

print "Ingesting some data ..."
f = open("/bdsetup/acculog.txt", "rb")
for i in range(250):
    line = f.readline().rstrip()
    label = '%04d' % i
    mut = Mutation('r_%s' % label)
    mut.put(cq='cq1', val=line)
    #mut.put(cf='cf_%s'%label, cq='cq1', val=line)
Ejemplo n.º 27
0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pyaccumulo import Accumulo, Mutation, Range
from pyaccumulo.iterators import *
import settings

conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD)

table = "analytics"

if conn.table_exists(table):
    conn.delete_table(table)
conn.create_table(table)

summing = SummingCombiner(priority=10)
summing.add_column("sum")
summing.add_column("count")
summing.attach(conn, table)

sumarray = SummingArrayCombiner(priority=11)
sumarray.add_column("histo")
sumarray.attach(conn, table)