def get_ontology_s2s( aliasc, opcode, usr): # considera os purposes nos filtros quando alias == None cl4 = index.create_index_expression(column_name='obj_dest', value=str(aliasc)) cl5 = index.create_index_expression(column_name='opcode', value=str(opcode)) clausec = index.create_index_clause([cl4, cl5], count=1000000) resultSet = tb_relaction.get_indexed_slices(clausec) for ky, resultsC in resultSet: ido = resultsC[u'obj_orig'] ido2 = resultsC[u'opcode'] ido3 = resultsC[u'foco'] ido4 = resultsC[u'foco_d'] ido5 = resultsC[u'cond'] #=== if ido2 == None: ido2 = '' if ido3 == None: ido3 = '' if ido4 == None: ido4 = '' if ido5 == None: ido5 = '' obj_principal = mdTb.get_object_by_data(ido, ido) rts.append(obj_principal) return rts
def fetch(self, model, specs, number): expressions = [] for spec in specs: if isinstance(spec.value_spec, EQ): expressions.push( create_index_expression(spec.attr, spec.value_spec.value)) elif isinstance(spec.value_spec, GT): expressions.push(create_index_expression( spec.attr, spec.value_spec.value, index.GT)) elif isinstance(spec.value_spec, LT): expressions.push(create_index_expression( spec.attr, spec.value_spec.value, index.LT)) elif isinstance(spec.value_spec, GTE): expressions.push(create_index_expression( spec.attr, spec.value_spec.value, index.GTE)) elif isinstance(spec.value_spec, LTE): expressions.push(create_index_expression( spec.attr, spec.value_spec.value, index.LTE)) cfm = ColumnFamilyMap(model, pool, model.cf_name) clause = create_index_clause([state_expr, bday_expr], count=number) def value_only(lst): values = [] for key, value in lst: values.push(value) return values return value_only(cfm.get_indexed_slices(clause))
def get_top_level(obj,foc,usr,termo_s): rts=[] cl1 = index.create_index_expression(column_name='OBJECT', value=obj) cl2 = index.create_index_expression(column_name='TOPICO', value=foc) cl3 = index.create_index_expression(column_name='USERNAME', value=usr) cl4 = index.create_index_expression(column_name='UID', value=termo_s) clausec = index.create_index_clause([cl1,cl2,cl3,cl4],count=1000000) rest=tb_object_dt.get_indexed_slices(clausec) # #for results in resultSet: for kl,cols in rest: i=cols[u'lev'] id_top=cols[u'id_top'] rts.append([i,id_top]) return rts
def test_insert_get_indexed_slices(self): instance1 = TestIndex() instance1.key = 'key1' instance1.birthdate = 1L self.indexed_map.insert(instance1) instance2 = TestIndex() instance2.key = 'key2' instance2.birthdate = 1L self.indexed_map.insert(instance2) instance3 = TestIndex() instance3.key = 'key3' instance3.birthdate = 2L self.indexed_map.insert(instance3) expr = index.create_index_expression(column_name='birthdate', value=2L) clause = index.create_index_clause([expr]) # test with passing an instance result = self.indexed_map.get_indexed_slices(instance1, index_clause=clause) assert_equal(len(result), 2) assert_equal(result.get('key1'), instance1) assert_equal(result.get('key2'), instance2) # test without passing an instance result = self.indexed_map.get_indexed_slices(index_clause=clause) assert_equal(len(result), 1) assert_equal(result.get('key3'), instance3)
def test_get_indexed_slices_batching(self): indexed_cf = ColumnFamily(self.client, 'Indexed1') columns = {'birthdate': 1L} for i in range(200): indexed_cf.insert('key%d' % i, columns) expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr], count=10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000)) assert_equal(len(result), 10) clause = index.create_index_clause([expr], count=250) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000)) assert_equal(len(result), 200)
def test_get_indexed_slices_batching(self): indexed_cf = ColumnFamily(pool, 'Indexed1') columns = {'birthdate': 1L} for i in range(200): indexed_cf.insert('key%d' % i, columns) expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr], count=10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000)) assert_equal(len(result), 10) clause = index.create_index_clause([expr], count=250) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000)) assert_equal(len(result), 200)
def test_insert_get_indexed_slices(self): instance1 = TestIndex() instance1.key = 'key1' instance1.birthdate = 1L self.indexed_map.insert(instance1) instance2 = TestIndex() instance2.key = 'key2' instance2.birthdate = 1L self.indexed_map.insert(instance2) instance3 = TestIndex() instance3.key = 'key3' instance3.birthdate = 2L self.indexed_map.insert(instance3) expr = index.create_index_expression(column_name='birthdate', value=2L) clause = index.create_index_clause([expr]) result = self.indexed_map.get_indexed_slices(index_clause=clause) count = 0 for instance in result: assert_equal(instance, instance3) count += 1 assert_equal(count, 1)
def test_insert_get_indexed_slices(self): indexed_cf = ColumnFamily(self.client, 'Indexed1') columns = {'birthdate': 1L} key = 'key1' indexed_cf.insert(key, columns, write_consistency_level=ConsistencyLevel.ONE) key = 'key2' indexed_cf.insert(key, columns, write_consistency_level=ConsistencyLevel.ONE) key = 'key3' indexed_cf.insert(key, columns, write_consistency_level=ConsistencyLevel.ONE) expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr]) result = indexed_cf.get_indexed_slices(clause) assert len(result) == 3 assert result.get('key1') == columns assert result.get('key2') == columns assert result.get('key3') == columns
def get_user_by_nick(self, user_nick): nick_expression = create_index_expression('nick', user_nick) clause = create_index_clause([nick_expression], count=1) user_dict = None for key, user in user_cf.get_indexed_slices(clause): user_dict = msgpack.unpackb(zlib.decompress(user.get("data"))) return user_dict
def authenticatorlist_get_id(self,key): global pool ret = None authenticatorlist = ColumnFamily(pool,'authenticators_info') expr = create_index_expression('token_id',key) clause = create_index_clause([expr]) result = authenticatorlist.get_indexed_slices(clause) for keyx,columnx in result: ret = keyx return ret
def run_process(): def post_urls(r): for lnk in r: lnk = parse_url_rt(lnk) rt = test_page(lnk) if rt == 1: # grava tab cache_products try: # verifica se tem no banco de dados r = tab2.get(lnk) except: # se nao tiver, insere print 'insert into product.table:', lnk try: tab2.insert(lnk, {"INDEXED": 'N'}) except: log.exception("ERROR") else: # grava tab cache_links try: # verifica se tem no banco de dados r = tab3.get(lnk) except: # se nao tiver, insere print 'insert into link.table:', lnk tab3.insert(lnk, {"INDEXED": 'N'}) def empty_links(): r = tab3.get_range() for c, i in r: return False return True if empty_links(): print 'init cache links...' # lnk_entrada = 'http://www.submarino.com.br/loja/342768/moda-calcados-e-acessorios?sc_m=in|bf|moda|_' base_url = 'moda-calcados-e-acessorios' r = parse_url(lnk_entrada, base_url) post_urls(r) return True else: # open table links an run each link # get INDEXED = 'N' in cache_links print 'implement cache links...' base_url = 'moda-calcados-e-acessorios' cl4 = index.create_index_expression(column_name='INDEXED', value='N') clausec = index.create_index_clause([cl4], count=100) resultSet = tab3.get_indexed_slices(clausec) have = False for key1, results in resultSet: link = key1 print 'run.url:', link rt = parse_url(link, base_url) post_urls(rt) have = True return have
def authenticator_get_token_id(self,key): # if provided a column value key, get this token id global pool ret = None authenticator = ColumnFamily(pool,'authenticator') expr = create_index_expression('atoken',key) clause = create_index_clause([expr]) result = authenticator.get_indexed_slices(clause) for keyx,columnx in result: ret = keyx return ret
def network_get_id_viaSession(self,key): # if provided a column value key, get this token id global pool ret = None network = ColumnFamily(pool,'network_info') expr = create_index_expression('session_id',key) clause = create_index_clause([expr]) result = network.get_indexed_slices(clause) for keyx,columnx in result: ret = keyx return ret
def handle_get_id_viaNetwork(self,key): # if provided a column value key, get this token id global pool ret = None token = ColumnFamily(pool,'handle') expr = create_index_expression('network_item_id',key) clause = create_index_clause([expr]) result = token.get_indexed_slices(clause) for keyx,columnx in result: ret = keyx return ret
def get_nodes_by_attr(self, type, attrs={}, expressions=None, start_key="", row_count=2147483647, **kwargs): if expressions is None: expressions = [] for attr, value in self.serialize_columns(attrs).items(): expressions.append(index.create_index_expression(attr, value)) clause = index.create_index_clause(expressions, start_key=start_key, count=row_count) try: column_family = self.delegate.get_cf(type) rows = column_family.get_indexed_slices(clause, **kwargs) except NotFoundException: raise NodeNotFoundException() return [prim.Node(self, type, key, self.deserialize_value(values)) for key, values in rows]
def test_get_indexed_slices(self): sys = SystemManager() for cf, keys in self.type_groups: sys.create_index(TEST_KS, cf.column_family, 'birthdate', LongType()) cf = ColumnFamily(pool, cf.column_family) for key in keys: cf.insert(key, {'birthdate': 1}) expr = create_index_expression('birthdate', 1) clause = create_index_clause([expr]) rows = list(cf.get_indexed_slices(clause)) assert_equal(len(rows), len(keys)) for k, c in rows: assert_true(k in keys) assert_equal(c, {'birthdate': 1})
def run_process(): def post_urls(r): for lnk in r: lnk=parse_url_rt(lnk) rt=test_page(lnk) if rt==1: # grava tab cache_products try: # verifica se tem no banco de dados r=tab2.get(lnk) except: # se nao tiver, insere print 'insert into product.table:',lnk tab2.insert(lnk,{"INDEXED":'N'}) else:# grava tab cache_links try: # verifica se tem no banco de dados r=tab3.get(lnk) except: # se nao tiver, insere print 'insert into link.table:',lnk tab3.insert(lnk,{"INDEXED":'N'}) def empty_links(): r=tab3.get_range() for c,i in r: return False return True if empty_links(): print 'init cache links...' #post_urls(['http://www.extra.com.br/Moda/AcessoriosdeModa/OculosdeSol/HB-Carvin-Round---9009911424---Marrom-Bege-2032448.html?recsource=busca-col&rectype=col-14780-1734']) lnk_entrada='http://www.extra.com.br/moda/?Filtro=C1734' base_url='http://www.extra.com.br/moda/' r=parse_url(lnk_entrada,base_url) post_urls(r) return True else: # open table links an run each link # get INDEXED = 'N' in cache_links print 'implement cache links...' base_url='http://www.extra.com.br/moda/' cl4 = index.create_index_expression(column_name='INDEXED', value='N') clausec = index.create_index_clause([cl4],count=100) resultSet=tab3.get_indexed_slices(clausec) have=False for key1,results in resultSet: link=key1 print 'run.url:',link rt=parse_url(link,base_url) post_urls(rt) have=True return have
def test_insert_get_indexed_slices(self): instance = TestIndex() instance.key = 'key' instance.birthdate = 1L self.indexed_map.insert(instance) instance.key = 'key2' self.indexed_map.insert(instance) instance.key = 'key3' self.indexed_map.insert(instance) expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr]) result = self.indexed_map.get_indexed_slices(instance, index_clause=clause) assert len(result) == 3 assert result.get('key3') == instance
def test_insert_get_indexed_slices(self): instance = TestIndex() instance.key = 'key' instance.birthdate = 1L self.indexed_map.insert(instance) instance.key = 'key2' self.indexed_map.insert(instance) instance.key = 'key3' self.indexed_map.insert(instance) expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr]) result = self.indexed_map.get_indexed_slices(instance, index_clause=clause) assert_equal(len(result), 3) assert_equal(result.get('key3'), instance)
def queue_list(self, application_name, limit=100, offset=None): """Return list of queues""" cl = self.cl or LOCAL_QUORUM if self.multi_dc else QUORUM app_expr = create_index_expression('application', application_name) if offset: offset = '%s:%s' % (application_name, offset) clause = create_index_clause([app_expr], start_key=offset, count=limit) else: clause = create_index_clause([app_expr], count=limit) results = self.queue_fam.get_indexed_slices( clause, columns=['application'], read_consistency_level=cl) # Pull off the application name in front app_len = len(application_name) + 1 return [key[app_len:] for key, _ in results]
def get_nodes_by_attr(self, type, attrs = {}, expressions=None, start_key='', row_count = 2147483647, **kwargs): if expressions is None: expressions = [] for attr, value in self.serialize_columns(attrs).items(): expressions.append(index.create_index_expression(attr, value)) clause = index.create_index_clause(expressions, start_key=start_key, count=row_count) try: column_family = self.delegate.get_cf(type) rows = column_family.get_indexed_slices(clause, **kwargs) except NotFoundException: raise NodeNotFoundException() return [ prim.Node(self, type, key, self.deserialize_value(values)) for key, values in rows ]
def queue_list(self, application_name, limit=100, offset=None): """Return list of queues""" cl = self.cl or LOCAL_QUORUM if self.multi_dc else QUORUM app_expr = create_index_expression('application', application_name) if offset: offset = '%s:%s' % (application_name, offset) clause = create_index_clause([app_expr], start_key=offset, count=limit) else: clause = create_index_clause([app_expr], count=limit) results = self.queue_fam.get_indexed_slices(clause, columns=['application'], read_consistency_level=cl) # Pull off the application name in front app_len = len(application_name) + 1 return [key[app_len:] for key, _ in results]
def get_to_index(): global all_index cl4 = index.create_index_expression(column_name='indexed', value='N') clausec = index.create_index_clause([cl4],count=10) rg=wb2.get_indexed_slices(clausec) # cnt=0 for ky,col in rg: id_pg=ky col['indexed']='S' wb2.insert(ky,col) # run(str(id_pg)) # cnt+=1 print 'page:',all_index all_index+=1 return cnt
def insert_insert_get_indexed_slices(self): indexed_cf = ColumnFamily(pool, 'Indexed1') columns = {'birthdate': 1L} keys = [] for i in range(1, 4): indexed_cf.insert('key%d' % i, columns) keys.append('key%d') expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr]) count = 0 for key, cols in indexed_cf.get_indexed_slices(clause): assert_equal(cols, columns) assert key in keys count += 1 assert_equal(count, 3)
def insert_insert_get_indexed_slices(self): indexed_cf = ColumnFamily(pool, "Indexed1") columns = {"birthdate": 1L} keys = [] for i in range(1, 4): indexed_cf.insert("key%d" % i, columns) keys.append("key%d") expr = index.create_index_expression(column_name="birthdate", value=1L) clause = index.create_index_clause([expr]) count = 0 for key, cols in indexed_cf.get_indexed_slices(clause): assert_equal(cols, columns) assert key in keys count += 1 assert_equal(count, 3)
def insert_insert_get_indexed_slices(self): indexed_cf = ColumnFamily(self.client, 'Indexed1') columns = {'birthdate': 1L} keys = [] for i in range(1,4): indexed_cf.insert('key%d' % i, columns) keys.append('key%d') expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr]) count = 0 for key,cols in indexed_cf.get_indexed_slices(clause): assert cols == columns assert key in keys count += 1 assert_equal(count, 3)
def insert_insert_get_indexed_slices(self): columns = {'birthdate': 1L} keys = set() for i in range(1, 4): indexed_cf.insert('key%d' % i, columns) indexed_cf_stub.insert('key%d' % i, columns) keys.add('key%d' % i) expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr]) for test_indexed_cf in (indexed_cf, indexed_cf_stub): count = 0 for key, cols in test_indexed_cf.get_indexed_slices(clause): assert_equal(cols, columns) assert key in keys count += 1 assert_equal(count, 3)
def get_by(cls, attribute, value): """Only works for columns indexed in Cassandra. This means that the property must be in the __indexes__ attribute. :param attribute: The attribute to lookup. This argument is always provided by the partial method. :param value: The value to match. Returns a list of matched objects. """ col_fam = ColumnFamily(cls.pool, cls.__column_family__) clause = create_index_clause([create_index_expression(attribute, value)]) idx_slices = col_fam.get_indexed_slices(clause) result = [] for rowkey, columns in idx_slices: result.append(cls(rowkey, **columns)) return result
def test_insert_get_indexed_slices(self): instance1 = TestIndex() instance1.key = "key1" instance1.birthdate = 1L self.indexed_map.insert(instance1) instance2 = TestIndex() instance2.key = "key2" instance2.birthdate = 1L self.indexed_map.insert(instance2) instance3 = TestIndex() instance3.key = "key3" instance3.birthdate = 2L self.indexed_map.insert(instance3) expr = index.create_index_expression(column_name="birthdate", value=2L) clause = index.create_index_clause([expr]) result = self.indexed_map.get_indexed_slices(index_clause=clause) assert_equal(len(result), 1) assert_equal(result.get("key3"), instance3)
def execute(self): client = db_connection.get_client() cf = ColumnFamily(client, self.domain) try: #### SELECT QUERY #### if self.op_type == CassandraQuery.OP_SELECT: if not self.where_node: ## treat this as a simple key get query if self.limit == 1: result = cf.get(self.offset) if result: return (True, result, None) else: return (False, None, DatabaseError("No " + self.domain + "entry matching row_key: " + self.offset)) else: return (False, None, DatabaseError( "Limit for SELECT operation must be 1")) else: ## treat this as an indexed_slices query if self.limit == 1: ## we consider the assumption that there is only a single AND node with filtering children index_expressions = [] for field_predicate, value in self.where_node.children: field_predicate_list = field_predicate.split("__") field = field_predicate_list[0] predicate = EQ if len(field_predicate_list) == 2: try: predicate = SelectManager.predicate_map[ field_predicate_list[1]] except: predicate = EQ index_exp = create_index_expression( field, value, predicate) index_expressions.append(index_exp) index_clause = create_index_clause( index_expressions, start_key=self.offset, count=self.limit) result = cf.get_indexed_slices(index_clause) if result: return (True, result, None) else: return (False, None, DatabaseError("No " + self.domain + "entry matching query: " + self.where_node)) else: return (False, None, DatabaseError( "Limit for SELECT operation must be 1")) #### FETCH QUERY #### elif self.op_type == CassandraQuery.OP_FETCH: if self.limit > SelectManager.MAX_FETCH_LIMIT: return ( False, None, DatabaseError( "LIMIT for FETCH operation exceeds MAX_FETCH_LIMIT(1000)" )) if not self.where_node: ## Treat this as a key range query key_offset = self.offset limit = self.limit result = {} while True: if limit < SelectManager.REGULAR_FETCH_LIMIT: res = cf.get_range(key_offset, row_count=limit) result.update(res) break else: res = cf.get_range( key_offset, row_count=SelectManager.REGULAR_FETCH_LIMIT) result.update(res) if len(res) < SelectManager.REGULAR_FETCH_LIMIT: break else: max_key = sorted(res.keys(), reverse=True)[0] key_offset = max_key + 1 limit -= SelectManager.REGULAR_FETCH_LIMIT return (True, result, None) else: ## Treat this as a fetch query ## first create index expressions index_expressions = [] for field_predicate, value in self.where_node.children: field_predicate_list = field_predicate.split("__") field = field_predicate_list[0] predicate = EQ if len(field_predicate_list) == 2: try: predicate = SelectManager.predicate_map[ field_predicate_list[1]] except: predicate = EQ index_exp = create_index_expression( field, value, predicate) index_expressions.append(index_exp) key_offset = self.offset limit = self.limit result = {} while True: if limit < SelectManager.REGULAR_FETCH_LIMIT: index_clause = create_index_clause( index_expressions, start_key=key_offset, count=limit) res = cf.get_indexed_slices(index_clause) result.update(res) break else: index_clause = create_index_clause( index_expressions, start_key=key_offset, count=SelectManager.REGULAR_FETCH_LIMIT) res = cf.get_indexed_slices(index_clause) result.update(res) if len(res) < SelectManager.REGULAR_FETCH_LIMIT: break else: max_key = sorted(res.keys(), reverse=True)[0] key_offset = max_key + 1 limit -= SelectManager.REGULAR_FETCH_LIMIT return (True, result, None) except Exception, ex: return (False, None, ex)
def get_fuzzy2(name, user): ''' ''' affinity = name #== cl4 = index.create_index_expression(column_name='layout_onto', value=affinity) clausec = index.create_index_clause([cl4], count=1000000) resultSet2 = tb_fuzzy.get_indexed_slices(clausec) resultSet = [] aresults = [] for ky, re in resultSet2: # fzname = re[u'fzname'] force_position = re[u'force_position'] mandatory = re[u'mandatory'] direction = re[u'direction'] an = re[u'an'] sq = int(re[u'sq']) resultSet.append( [ky, fzname, force_position, mandatory, direction, an, sq]) # # resultSet = sorted(resultSet, key=itemgetter(6), reverse=True) # for results in resultSet: [ky, fzname, force_position, mandatory, direction, an, sq] = results #print 'Get-FZ:',fzname force_position = (umisc.trim(force_position).upper() == "Y" or umisc.trim(force_position).upper() == "S") mandatory = (umisc.trim(mandatory).upper() == "Y" or umisc.trim(mandatory).upper() == "S") direction = umisc.trim(direction).upper() f_an = umisc.trim(an).upper() #===-------------------------------------------------- referer = [] start_i = 0 while True: start_i += 1 try: r1 = tb_fz_store_refer.get(fzname + "|" + str(start_i)) except: break referer.append(r1[u'refer']) #===-------------------------------------------------- #sqlr="SELECT trim(defs),trim(sin_ret),trim(vl_ret),trim( special_direct ) FROM fz_store_defs where fz='"+fzname+"' and username='******' " #print sqlr has_break = False breaks = [] arround = [] DEFS = [] sinap_result = [] start_i = 0 while True: start_i += 1 try: results2 = tb_fz_store_defs.get(fzname + "|" + str(start_i)) except: break returns = [] vl_ret1 = results2[u'vl_ret'] special_direct = results2[u'special_direct'] if special_direct == None: special_direct = '' if vl_ret1 == None: vl_ret1 = '' # formato : [ topico,sub,sinapse ][topico,sub,sinapse][topico,sub,sinapse] tuples = [] tmp = '' for s in vl_ret1: if s == '[': tmp = '' elif s == ']': tuples.append(tmp) tmp = '' else: tmp += s #print 'tuples:',tuples for tup in tuples: top = '' sub = '' sin = '' tmp = '' for s in tup: if s == ',': if top == '': top = tmp tmp = '' elif sub == '': sub = tmp tmp = '' else: sin = tmp tmp = '' else: tmp += s if umisc.trim(tmp) != '': sin = tmp if umisc.trim(top) != '': if top == 'break': has_break = True else: returns.append([top, sub, sin, special_direct]) #print returns,'...' #---- defs1 = results2[u'defs'] ps = [] if len(defs1) > 0: tmp = '' ind = 0 for d in defs1: if d == ',': if defs1[ind - 1] != '\\': ps.append(tmp) tmp = '' elif d != '\\': tmp += d elif d != '\\': tmp += d ind += 1 if umisc.trim(tmp) != '': ps.append(tmp) else: ps.append('') if has_break: for ss in ps: breaks.append(ss) else: DEFS.append([ps, returns]) sin_ret = results2[u'sin_ret'] if umisc.trim(sin_ret) != '': sinap_result.append(sin_ret) #===-------------------------------------------------- # # PREF = [] start_i = 0 while True: start_i += 1 try: r1 = tb_fz_store_pref.get(fzname + "|" + str(start_i)) except: break pref = r1[u'pref'] ps = [] if len(pref) > 0: tmp = '' ind = 0 for d in pref: if d == ',': if pref[ind - 1] != '\\': ps.append(tmp) tmp = '' else: tmp += d else: tmp += d if tmp != '': ps.append(tmp) else: ps.append('') PREF.append(ps) #===-------------------------------------------------- #sqlr="SELECT trim(sufix) FROM fz_store_sufix where fz='"+fzname+"' and username='******' " # SUFX = [] start_i = 0 while True: start_i += 1 try: r1 = tb_fz_store_sufix.get(fzname + "|" + str(start_i)) except: break sufix = r1[u'sufix'] ps = [] if len(sufix) > 0: tmp = '' ind = 0 for d in sufix: if d == ',': if sufix[ind - 1] != '\\': ps.append(tmp) tmp = '' else: tmp += d else: tmp += d if tmp != '': ps.append(tmp) else: ps.append('') SUFX.append(ps) #===-------------------------------------------------- ind = 0 sents = [] for cDF in DEFS: PR = [''] SF = [['']] if ind < len(PREF): PR = PREF[ind] if ind < len(SUFX): SF = SUFX DEF = cDF sent = [PR, DEF, SF] sents.append(sent) ind += 1 aresults.append([ fzname, sents, mandatory, referer, force_position, arround, sinap_result, direction, f_an ]) return aresults
import pycassa from pycassa.pool import ConnectionPool from pycassa import index from pycassa.columnfamily import ColumnFamily pool2 = ConnectionPool('MINDNET', ['79.143.185.3:9160'], timeout=10000) tb_object_dt1 = pycassa.ColumnFamily(pool2, 'SEMANTIC_OBJECT_DT') r = tb_object_dt1.get('cenario_objs_importantes_ecomm1|1') print r print 'datach:', r['datach'] cl4 = index.create_index_expression(column_name="datach", value="$rule-destination-for-objects") cl5 = index.create_index_expression(column_name="topico", value="purp-destin") clausec = index.create_index_clause([cl4, cl5], count=1000000) resultSetk = tb_object_dt1.get_indexed_slices(clausec) #============================================================= for ky, cols in resultSetk: obj = cols[u'object'] print obj print 'done:-----------------' #=============================================================
print 'total products:',all_product lenarg=len(sys.argv) clearall='N' if lenarg > 1: clearall=sys.argv[1] clear_dt() tot_prod=0 while True: cl4 = index.create_index_expression(column_name='INDEXED', value='N') clausec = index.create_index_clause([cl4],count=50) rg=tab2.get_indexed_slices(clausec) # matriz_prod=[] # cnt=0 for ky,col in rg: url=ky cnt+=1 tot_prod+=1 print 'parse:',cnt,',total:',tot_prod query=url opener = urllib2.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 GTB5')] #
a_lock.acquire() print 're-open:', ids try: #conn.sql("update fcb_users set indexed='N' where ID=\'"+str(ids) +'\'' ) gt = fcb.get(ids) gt[u'indexed'] = 'N' fcb.insert(ids, gt) except: print 'Eror reopen:' log.exception("") a_lock.release() global_cnt = 0 restg = [] exprc = index.create_index_expression(column_name='indexed', value='N') clausec = index.create_index_clause([exprc], count=1000) print 'Prepare cache:' indk1 = 1 rest = fcb.get_indexed_slices(clausec) for kl, cols in rest: try: ids = cols['id'] restg.append([kl, ids]) except Exception, e: print 'INFO:Error in ', e indk1 += 1 if indk1 % 500 == 0: print 'cnt:', indk1 print 'restg:', len(restg), '->OK'