Exemple #1
0
                port=settings.PORT,
                user=settings.USER,
                password=settings.PASSWORD)

table = sys.argv[1]
if not conn.table_exists(table):
    print("Table '%s' does not exist." % table)
    sys.exit(1)

search_terms = [term.lower() for term in sys.argv[2:] if len(term) > 3]

if len(search_terms) < 2:
    print("More than one term of length > 3 is required for this example")
    sys.exit(1)

uuids = []
for e in conn.batch_scan(
        table,
        scanranges=[Range(srow="s", erow="t")],
        iterators=[IntersectingIterator(priority=21, terms=search_terms)]):
    uuids.append(e.cq)

if len(uuids) > 0:
    for doc in conn.batch_scan(
            table, scanranges=[Range(srow=uuid, erow=uuid) for uuid in uuids]):
        print(doc.val)
else:
    print("No results found")

conn.close()
Exemple #2
0
  if entry is None:
    return None
  else:
    return entry.cq


conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD)
table = settings.TABLE

if sys.argv[1] == "-c":
  print "create"
  wr = conn.create_batch_writer(table)
  i=0
  q="%s:%s"%(Q,sys.argv[2])
  mut = Mutation(q)
  for entry in conn.batch_scan(table,cols=[["Genome","md5"]],numthreads=10):
     genome=entry.row
     if i%1000 == 0:
       print entry.row
     mut.put(cf=QUEUED,cq=genome)
     i=i+1
  wr.add_mutation(mut)
  wr.close()
  exit()

if sys.argv[1] == "-r":
  print "recover"
  q="%s:%s"%(Q,sys.argv[2])
  genome=randtask(q,INPROGRESS,10)
  while genome:
    print genome
Exemple #3
0
table = "pythontest"

conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD)

if conn.table_exists(table):
    conn.delete_table(table)

conn.create_table(table)
wr = conn.create_batch_writer(table)

print "Ingesting some data ..."
for num in range(1, 100):
    label = '%03d'%num
    mut = Mutation('r_%s'%label)
    mut.put(cf='cf_%s'%label, cq='cq1', val='value_%s'%label)
    mut.put(cf='cf_%s'%label, cq='cq2', val='value_%s'%label)
    wr.add_mutation(mut)
wr.close()


print "Rows 001 through 003 ..."
for entry in conn.scan(table, scanrange=Range(srow='r_001', erow='r_003'), cols=[]):
    print entry

print "Rows 001 and 011 ..."
for entry in conn.batch_scan(table, scanranges=[Range(srow='r_001', erow='r_001'), Range(srow='r_011', erow='r_011')]):
    print entry

conn.close()
linenum = 0

with open(license_file) as infile:
    for line in infile:
        linenum += 1

        m = Mutation(str(linenum))
        m.put(cf="e", cq="", val=line.strip())
        wr.add_mutation(m)
wr.close()

regex1 = RegExFilter(priority=21,
                     val_regex=".*stated.*",
                     match_substring=True,
                     name="RegExFilter1")
regex2 = RegExFilter(priority=22,
                     val_regex='.*patent',
                     match_substring=True,
                     name="RegExFilter2")
regex3 = RegExFilter(priority=23,
                     val_regex='have made',
                     match_substring=True,
                     name="RegExFilter3")

for e in conn.batch_scan(table,
                         cols=[["e"]],
                         iterators=[regex1, regex2, regex3]):
    print(e)

conn.close()
table = "Plenario_data"

# poly = Polygon([(37.795542, -122.423058), 
#                (37.800019, -122.398853), 
#                (37.789302, -122.38821), 
#                (37.7737, -122.39542), 
#                (37.770036, -122.417736)])
now_latlong_1 = "37.795542, -122.423058"
now_latlong_2 = "37.800019, -122.398853"
now_latlong_3 = "37.789302, -122.38821"
now_latlong_4 = "37.78121, -122.39212"
now_latlong_5 = "37.770036, -122.417736"

for i in range(0,20):
    poly = Polygon(simulate_try())
    
    # scan the entire table with 10 threads
    count = 0
    min_gh, max_gh = geohash_min_max(poly)

    for entry in conn.batch_scan(table, numthreads=10, 
            scanranges=[Range(srow=min_gh, erow=max_gh)]):
        count += 1
        if count%10000 == 0:
            print count
        if poly.contains(Point(*geohash.decode(entry.row))):
            print entry.row, entry.cf, entry.ts, entry.val
    print poly.bounds
    print poly.area
    print "Done With One Query %d!!!"%(i)
class DBConnection:
    """ The interface to Accumulo.
    """

    __slots__ = ['_host','_port','_user','_password','_conn']
    
    def __init__(self, host, port, user, password):
        self._host = host
        self._port = port
        self._user = user
        self._password = password

    def connect(self):
        self._conn = Accumulo(host=self._host, port=self._port, user=self._user, password=self._password)        
 
    def query(self, plenario_session_state):
        dataset = plenario_session_state.get_dataset()
        if dataset != "dat_master":
            raise Exception("Querying currently only supported on dat_master!")
            
        ngon = plenario_session_state.get_ngon()
        if ngon is None:
            raise Exception("You must have an N-gon selected!")
        if len(ngon) != 5:
            raise Exception("Querying currently only supported for a 5-gon!")
        
        start_date = plenario_session_state.get_start_date() 
        end_date = plenario_session_state.get_end_date() 
        date_aggr = plenario_session_state.get_date_aggr()
    
        p0 = ngon[0]
        p1 = ngon[1]
        p2 = ngon[2]
        p3 = ngon[3]
        p4 = ngon[4]

        p0 = (41.88, -87.64)
        p1 = (41.89, -87.64)
        p2 = (41.89, -87.63 )
        p3 = (41.88, -87.63)
        p4 = (41.88, -87.635)         

        min_gh = geohash.encode(-89.9,-179.9)
        max_gh = geohash.encode(89.9,179.9)

        cells = []
        for cell in self._conn.batch_scan("dat_master", numthreads=10, scanranges=[Range(srow=min_gh, erow=max_gh)]):
            cells.append(cell)
    
        # Grouping key-value pairs that belong to same entry
        rows = {}
        for cell in cells:
            if cell.cq not in rows:
                rows[cell.cq] = {}
                rows[cell.cq]['ghash'] = cell.row
                rows[cell.cq][cell.cf] = cell.val
            rows[cell.cq][cell.cf] = cell.val

        # Filter out those that are not in the temporal range
        start_date = datetime.strptime(start_date, "%m/%d/%Y")
        end_date = datetime.strptime(end_date, "%m/%d/%Y")

        rows_filtered = {}

        for key, val in rows.iteritems():
                obs_date = datetime.strptime(val['obs_date'], "%Y-%m-%d %H:%M:%S")
                if start_date <= obs_date and obs_date <= end_date:
                    rows_filtered[key] = val

        # Filter out those that are not in the spatial range, i.e. within the polygon
        rows = rows_filtered
        rows_filtered = {}       

        #poly = Polygon([p0,p1,p2,p3,p4])
        poly = Polygon([(-90,-180),(90,-180),(90,180),(-90,180)])

        for key, val in rows.iteritems():
            ghash = val['ghash']
            pt = Point(geohash.decode(ghash))
            if poly.contains(pt):
                rows_filtered[key] = val 
    
        # Truncate date as specified by date_aggr and count group sizes
        rows = rows_filtered
        rows_filtered = {}

        for key, val in rows.iteritems():
            date = truncate(datetime.strptime(val['obs_date'], "%Y-%m-%d %H:%M:%S"),date_aggr)
            tup = (val['dataset_name'],date.isoformat())
            if tup not in rows_filtered:
                rows_filtered[tup] = 1
            else:
                rows_filtered[tup] = rows_filtered[tup]+1

        return rows_filtered
Exemple #7
0
# See the License for the specific language governing permissions and
# limitations under the License.

from pyaccumulo import Accumulo
from pyaccumulo.iterators import *

import settings
import sys

conn = Accumulo(host=settings.HOST,
                port=settings.PORT,
                user=settings.USER,
                password=settings.PASSWORD)

table = sys.argv[1]
if not conn.table_exists(table):
    print("Table '%s' does not exist." % table)
    sys.exit(1)

search_terms = [term.lower() for term in sys.argv[2:] if len(term) > 3]

if len(search_terms) < 2:
    print("More than one term of length > 3 is required for this example")
    sys.exit(1)

for e in conn.batch_scan(
        table, iterators=[IndexedDocIterator(priority=21,
                                             terms=search_terms)]):
    print(e.val)
conn.close()
Exemple #8
0
import settings
conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD)

table = "regexes"
if conn.table_exists(table):
    conn.delete_table(table)
conn.create_table(table)

wr = conn.create_batch_writer(table)

license_file = "LICENSE"
linenum = 0

with file(license_file) as infile:
    for line in infile:
        linenum += 1
        
        m = Mutation(str(linenum))
        m.put(cf="e", cq="", val=line.strip())
        wr.add_mutation(m)
wr.close()

regex1 = RegExFilter(priority=21, val_regex=".*stated.*", match_substring=True, name="RegExFilter1")
regex2 = RegExFilter(priority=22, val_regex='.*patent', match_substring=True, name="RegExFilter2")
regex3 = RegExFilter(priority=23, val_regex='have made', match_substring=True, name="RegExFilter3")

for e in conn.batch_scan(table, cols=[["e"]], iterators=[regex1, regex2, regex3]):
    print e

conn.close()
Exemple #9
0
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pyaccumulo import Accumulo, Mutation, Range
from pyaccumulo.iterators import *

from pyaccumulo.proxy.ttypes import IteratorSetting, IteratorScope
from examples.util import hashcode
import hashlib, re
import settings
import sys

conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD)

table = sys.argv[1]
if not conn.table_exists(table):
    print "Table '%s' does not exist."%table
    sys.exit(1)

search_terms = [term.lower() for term in sys.argv[2:] if len(term) > 3]

if len(search_terms) < 2:
    print "More than one term of length > 3 is required for this example"
    sys.exit(1)

for e in conn.batch_scan(table, iterators=[IndexedDocIterator(priority=21, terms=search_terms)]):
    print e.val
conn.close()
Exemple #10
0
from pyaccumulo.proxy.ttypes import IteratorSetting, IteratorScope
from examples.util import hashcode
import hashlib, re
import settings
import sys

conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD)

table = sys.argv[1]
if not conn.table_exists(table):
    print "Table '%s' does not exist."%table
    sys.exit(1)

search_terms = [term.lower() for term in sys.argv[2:] if len(term) > 3]

if len(search_terms) < 2:
    print "More than one term of length > 3 is required for this example"
    sys.exit(1)

uuids = []
for e in conn.batch_scan(table, scanranges=[Range(srow="s", erow="t")], iterators=[IntersectingIterator(priority=21, terms=search_terms)]):
    uuids.append(e.cq)

if len(uuids) > 0:
    for doc in conn.batch_scan(table, scanranges=[Range(srow=uuid, erow=uuid) for uuid in uuids]):
        print doc.val
else:
    print "No results found"

conn.close()