def extract_links(self, response): base_url = get_base_url(response) if self.restrict_xpaths: docs = [ subdoc for x in self.restrict_xpaths for subdoc in response.xpath(x) ] else: docs = [response.selector] all_links = [] for doc in docs: links = self._extract_links(doc, response.url, response.encoding, base_url) all_links.extend(self._process_links(links)) return unique_list(all_links, lambda link: link.url)
def visit_select(self, select, asfrom=False, parens=True, iswrapper=False, compound_index=1, **kwargs): entry = self.stack and self.stack[-1] or {} existingfroms = entry.get('from', None) froms = select._get_display_froms(existingfroms) correlate_froms = set(sql._from_objects(*froms)) # TODO: might want to propagate existing froms for select(select(select)) # where innermost select should correlate to outermost # if existingfroms: # correlate_froms = correlate_froms.union(existingfroms) self.stack.append({'from': correlate_froms, 'iswrapper': iswrapper}) if compound_index == 1 and not entry or entry.get('iswrapper', False): column_clause_args = {'result_map': self.result_map} else: column_clause_args = {} # the actual list of columns to print in the SELECT column list. inner_columns = util.unique_list(c for c in [ self.process(self.label_select_column(select, co, asfrom=asfrom), within_columns_clause=True, **column_clause_args) for co in select.inner_columns ] if c is not None) text = "SELECT " # we're off to a good start ! if select._prefixes: text += " ".join(self.process(x) for x in select._prefixes) + " " text += self.get_select_precolumns(select) text += ', '.join(inner_columns) if froms: text += " \nFROM " text += ', '.join(self.process(f, asfrom=True) for f in froms) else: text += self.default_from() if select._whereclause is not None: t = self.process(select._whereclause) if t: text += " \nWHERE " + t if select._group_by_clause.clauses: group_by = self.process(select._group_by_clause) if group_by: text += " GROUP BY " + group_by if select._having is not None: t = self.process(select._having) if t: text += " \nHAVING " + t if select._order_by_clause.clauses: text += self.order_by_clause(select) if select._limit is not None or select._offset is not None: text += self.limit_clause(select) if select.for_update: text += self.for_update_clause(select) self.stack.pop(-1) if asfrom and parens: return "(" + text + ")" else: return text
def visit_select(self, select, asfrom=False, parens=True, iswrapper=False, fromhints=None, compound_index=0, force_result_map=False, positional_names=None, **kwargs): entry = self.stack and self.stack[-1] or {} existingfroms = entry.get('from', None) froms = select._get_display_froms(existingfroms, asfrom=asfrom) correlate_froms = set(sql._from_objects(*froms)) # TODO: might want to propagate existing froms for # select(select(select)) where innermost select should correlate # to outermost if existingfroms: correlate_froms = # correlate_froms.union(existingfroms) populate_result_map = force_result_map or ( compound_index == 0 and ( not entry or \ entry.get('iswrapper', False) ) ) self.stack.append({'from': correlate_froms, 'iswrapper': iswrapper}) sel = resolver.SelectResolver() column_clause_args = kwargs.copy() column_clause_args.update({ 'positional_names': positional_names, 'within_label_clause': False, 'within_columns_clause': False }) # the actual list of columns to print in the SELECT column list. inner_columns = [ c for c in [ self._label_select_column(select, column, populate_result_map, asfrom, column_clause_args) for column in util.unique_list(select.inner_columns) ] if c is not None ] sel.columns.extend(inner_columns) if froms: for f in froms: sel.dataframes.append( f._compiler_dispatch(self, asfrom=True, **kwargs) ) if select._whereclause is not None: t = select._whereclause._compiler_dispatch(self, **kwargs) sel.whereclause = t if select._group_by_clause.clauses: group_by = select._group_by_clause._compiler_dispatch( self, **kwargs) sel.group_by = group_by if select._order_by_clause.clauses: order_by = select._order_by_clause._compiler_dispatch( self, **kwargs) sel.order_by = order_by if select._having is not None: sel.having = select._having._compiler_dispatch(self, **kwargs) sel.limit = select._limit sel.offset = select._offset self.stack.pop(-1) return sel
def visit_select(self, select, asfrom=False, parens=True, iswrapper=False, fromhints=None, compound_index=1, force_result_map=False, nested_join_translation=False, **kwargs): entry = self.stack and self.stack[-1] or {} existingfroms = entry.get('from', None) froms = select._get_display_froms(existingfroms) correlate_froms = set(sql._from_objects(*froms)) # TODO: might want to propagate existing froms for # select(select(select)) where innermost select should correlate # to outermost if existingfroms: correlate_froms = # correlate_froms.union(existingfroms) self.stack.append({'from': correlate_froms, 'iswrapper': iswrapper}) # the actual list of columns to print in the SELECT column list. unique_co = [] distinct_alias = None for co in util.unique_list(select.inner_columns): sql_util = self._label_select_column(select, co, True, asfrom, {}) if "DISTINCT" in sql_util: distinct_alias = sql_util.split(" AS ")[-1] unique_co.append(sql_util) result_columns = [] if distinct_alias: for idx, rc_tuple in enumerate(self._result_columns): if rc_tuple[0] == distinct_alias: if rc_tuple[-2][-1] == distinct_alias: target_name = "@distinct" temp_rc = list(rc_tuple) temp_rc[0] = target_name inner_tuple = list(temp_rc[-2]) inner_tuple[-1] = target_name if not select._group_by_clause.clauses: raise AssertionError( "Can't query distinct if no group by is selected" ) temp_rc[-2] = tuple(inner_tuple) result_columns.append(tuple(temp_rc)) else: result_columns.append(rc_tuple) if result_columns: self._result_columns = result_columns inner_columns = [c for c in unique_co if c is not None] text = "SELECT " # we're off to a good start ! text += self.get_select_precolumns(select) text += ', '.join(inner_columns) text += " \nFROM " text += ', '.join( [f._compiler_dispatch(self, asfrom=True, **kwargs) for f in froms]) def check_match_clause(clause): left_tuple = [] right_tuple = [] match_operators = [] if isinstance(clause.type, MatchType): left_tuple.append(clause.left) right_tuple.append(clause.right) match_operators.append(clause.operator) elif isinstance(clause, Function): if clause.name.lower() == "match": func_left, func_right = clause.clauses left_tuple.append(func_left) right_tuple.append(func_right) elif isinstance(clause, ClauseList): for xclause in clause.clauses: l, r, m = check_match_clause(xclause) left_tuple.extend(l) right_tuple.extend(r) match_operators.extend(m) return left_tuple, right_tuple, match_operators if select._whereclause is not None: # Match Clauses must be done in the same compiler left_tuple = [] right_tuple = [] match_operators = [] l, r, m = check_match_clause(select._whereclause) left_tuple.extend(l) right_tuple.extend(r) match_operators.extend(m) if left_tuple and right_tuple: self.left_match = tuple(left_tuple) self.right_match = tuple(right_tuple) self.match_operators = tuple(match_operators) t = select._whereclause._compiler_dispatch(self, **kwargs) if t: text += " \nWHERE " + t if hasattr(self, "options_list"): if self.options_list: option_text = " OPTION {}".format(", ".join( self.options_list)) text += option_text if select._group_by_clause.clauses: group_by = select._group_by_clause._compiler_dispatch( self, **kwargs) text += " GROUP BY " + group_by if select._order_by_clause.clauses: text += self.order_by_clause(select, **kwargs) if select._limit is not None: text += self.limit_clause(select) self.stack.pop(-1) return text
def _run_cache_key_fixture(self, fixture, compare_values): case_a = fixture() case_b = fixture() for a, b in itertools.combinations_with_replacement( range(len(case_a)), 2 ): if a == b: a_key = case_a[a]._generate_cache_key() b_key = case_b[b]._generate_cache_key() is_not_(a_key, None) is_not_(b_key, None) eq_(a_key.key, b_key.key) eq_(hash(a_key), hash(b_key)) for a_param, b_param in zip( a_key.bindparams, b_key.bindparams ): assert a_param.compare( b_param, compare_values=compare_values ) else: a_key = case_a[a]._generate_cache_key() b_key = case_b[b]._generate_cache_key() if a_key.key == b_key.key: for a_param, b_param in zip( a_key.bindparams, b_key.bindparams ): if not a_param.compare( b_param, compare_values=compare_values ): break else: # this fails unconditionally since we could not # find bound parameter values that differed. # Usually we intended to get two distinct keys here # so the failure will be more descriptive using the # ne_() assertion. ne_(a_key.key, b_key.key) else: ne_(a_key.key, b_key.key) # ClauseElement-specific test to ensure the cache key # collected all the bound parameters if isinstance(case_a[a], ClauseElement) and isinstance( case_b[b], ClauseElement ): assert_a_params = [] assert_b_params = [] visitors.traverse_depthfirst( case_a[a], {}, {"bindparam": assert_a_params.append} ) visitors.traverse_depthfirst( case_b[b], {}, {"bindparam": assert_b_params.append} ) # note we're asserting the order of the params as well as # if there are dupes or not. ordering has to be # deterministic and matches what a traversal would provide. # regular traverse_depthfirst does produce dupes in cases # like # select([some_alias]). # select_from(join(some_alias, other_table)) # where a bound parameter is inside of some_alias. the # cache key case is more minimalistic eq_( sorted(a_key.bindparams, key=lambda b: b.key), sorted( util.unique_list(assert_a_params), key=lambda b: b.key ), ) eq_( sorted(b_key.bindparams, key=lambda b: b.key), sorted( util.unique_list(assert_b_params), key=lambda b: b.key ), )
def _run_cache_key_fixture(self, fixture, compare_values): case_a = fixture() case_b = fixture() for a, b in itertools.combinations_with_replacement( range(len(case_a)), 2): if a == b: a_key = case_a[a]._generate_cache_key() b_key = case_b[b]._generate_cache_key() if a_key is None: assert case_a[a]._annotations.get("nocache") assert b_key is None continue eq_(a_key.key, b_key.key) eq_(hash(a_key.key), hash(b_key.key)) for a_param, b_param in zip(a_key.bindparams, b_key.bindparams): assert a_param.compare(b_param, compare_values=compare_values) else: a_key = case_a[a]._generate_cache_key() b_key = case_b[b]._generate_cache_key() if a_key is None or b_key is None: if a_key is None: assert case_a[a]._annotations.get("nocache") if b_key is None: assert case_b[b]._annotations.get("nocache") continue if a_key.key == b_key.key: for a_param, b_param in zip(a_key.bindparams, b_key.bindparams): if not a_param.compare(b_param, compare_values=compare_values): break else: # this fails unconditionally since we could not # find bound parameter values that differed. # Usually we intended to get two distinct keys here # so the failure will be more descriptive using the # ne_() assertion. ne_(a_key.key, b_key.key) else: ne_(a_key.key, b_key.key) # ClauseElement-specific test to ensure the cache key # collected all the bound parameters if isinstance(case_a[a], ClauseElement) and isinstance( case_b[b], ClauseElement): assert_a_params = [] assert_b_params = [] visitors.traverse(case_a[a], {}, {"bindparam": assert_a_params.append}) visitors.traverse(case_b[b], {}, {"bindparam": assert_b_params.append}) # note we're asserting the order of the params as well as # if there are dupes or not. ordering has to be # deterministic and matches what a traversal would provide. eq_( sorted(a_key.bindparams, key=lambda b: b.key), sorted(util.unique_list(assert_a_params), key=lambda b: b.key), ) eq_( sorted(b_key.bindparams, key=lambda b: b.key), sorted(util.unique_list(assert_b_params), key=lambda b: b.key), )
def visit_select(self, select, asfrom=False, parens=True, iswrapper=False, compound_index=1, **kwargs): entry = self.stack and self.stack[-1] or {} existingfroms = entry.get('from', None) froms = select._get_display_froms(existingfroms) correlate_froms = set(sql._from_objects(*froms)) # TODO: might want to propagate existing froms for select(select(select)) # where innermost select should correlate to outermost # if existingfroms: # correlate_froms = correlate_froms.union(existingfroms) self.stack.append({'from':correlate_froms, 'iswrapper':iswrapper}) if compound_index==1 and not entry or entry.get('iswrapper', False): column_clause_args = {'result_map':self.result_map} else: column_clause_args = {} # the actual list of columns to print in the SELECT column list. inner_columns = util.unique_list( c for c in [ self.process( self.label_select_column(select, co, asfrom=asfrom), within_columns_clause=True, **column_clause_args) for co in select.inner_columns ] if c is not None ) text = "SELECT " # we're off to a good start ! if select._prefixes: text += " ".join(self.process(x) for x in select._prefixes) + " " text += self.get_select_precolumns(select) text += ', '.join(inner_columns) if froms: text += " \nFROM " text += ', '.join(self.process(f, asfrom=True) for f in froms) else: text += self.default_from() if select._whereclause is not None: t = self.process(select._whereclause) if t: text += " \nWHERE " + t if select._group_by_clause.clauses: group_by = self.process(select._group_by_clause) if group_by: text += " GROUP BY " + group_by if select._having is not None: t = self.process(select._having) if t: text += " \nHAVING " + t if select._order_by_clause.clauses: text += self.order_by_clause(select) if select._limit is not None or select._offset is not None: text += self.limit_clause(select) if select.for_update: text += self.for_update_clause(select) self.stack.pop(-1) if asfrom and parens: return "(" + text + ")" else: return text
def visit_select(self, select, asfrom=False, parens=True, iswrapper=False, fromhints=None, compound_index=1, force_result_map=False, nested_join_translation=False, **kwargs): entry = self.stack and self.stack[-1] or {} existingfroms = entry.get('from', None) froms = select._get_display_froms(existingfroms) correlate_froms = set(sql._from_objects(*froms)) # TODO: might want to propagate existing froms for # select(select(select)) where innermost select should correlate # to outermost if existingfroms: correlate_froms = # correlate_froms.union(existingfroms) self.stack.append({'from': correlate_froms, 'iswrapper': iswrapper}) # the actual list of columns to print in the SELECT column list. inner_columns = [ c for c in [ self._label_select_column(select, co, True, asfrom, {}) for co in util.unique_list(select.inner_columns) ] if c is not None ] text = "SELECT " # we're off to a good start ! if select._hints: byfrom = dict([(from_, hinttext % { 'name': from_._compiler_dispatch(self, ashint=True) }) for (from_, dialect), hinttext in select._hints.iteritems() if dialect in ('*', self.dialect.name)]) hint_text = self.get_select_hint_text(byfrom) if hint_text: text += hint_text + " " if select._prefixes: text += " ".join( x._compiler_dispatch(self, **kwargs) for x in select._prefixes) + " " text += self.get_select_precolumns(select) text += ', '.join(inner_columns) if froms: text += " \nFROM " if select._hints: text += ', '.join([ f._compiler_dispatch(self, asfrom=True, fromhints=byfrom, **kwargs) for f in froms ]) else: text += ', '.join([ f._compiler_dispatch(self, asfrom=True, **kwargs) for f in froms ]) else: text += self.default_from() if select._whereclause is not None: t = select._whereclause._compiler_dispatch(self, **kwargs) if t: text += " \nWHERE " + t if select._group_by_clause.clauses: group_by = select._group_by_clause._compiler_dispatch( self, **kwargs) if group_by: text += " GROUP BY " + group_by if select._having is not None: t = select._having._compiler_dispatch(self, **kwargs) if t: text += " \nHAVING " + t if select._order_by_clause.clauses: text += self.order_by_clause(select, **kwargs) if getattr(select, "_within_group_order_by_clause", None) is not None: if select._within_group_order_by_clause.clauses: text += self.within_group_order_by_clause(select, **kwargs) if select._limit is not None: text += self.limit_clause(select) if getattr(select, "_options", None) is not None: if select._options.options: text += self.options_clause(select, **kwargs) if select.for_update: text += self.for_update_clause(select) self.stack.pop(-1) if asfrom and parens: return "(" + text + ")" else: return text
def dedupe_tuple(tup: Tuple[str, ...]) -> Tuple[str, ...]: return tuple(unique_list(tup))
def visit_select(self, select, asfrom=False, parens=True, iswrapper=False, fromhints=None, compound_index=1, **kwargs): entry = self.stack and self.stack[-1] or {} existingfroms = entry.get('from', None) froms = select._get_display_froms(existingfroms) correlate_froms = set(sql._from_objects(*froms)) # TODO: might want to propagate existing froms for # select(select(select)) where innermost select should correlate # to outermost if existingfroms: correlate_froms = # correlate_froms.union(existingfroms) self.stack.append({'from': correlate_froms, 'iswrapper': iswrapper}) if compound_index == 1 and not entry or entry.get('iswrapper', False): column_clause_args = {'result_map': self.result_map} else: column_clause_args = {} # the actual list of columns to print in the SELECT column list. inner_columns = [ c for c in [ self.label_select_column(select, co, asfrom=asfrom).\ _compiler_dispatch(self, within_columns_clause=True, **column_clause_args) for co in util.unique_list(select.inner_columns) ] if c is not None ] text = "SELECT " # we're off to a good start ! if select._hints: byfrom = dict([ (from_, hinttext % { 'name':from_._compiler_dispatch( self, ashint=True) }) for (from_, dialect), hinttext in select._hints.iteritems() if dialect in ('*', self.dialect.name) ]) hint_text = self.get_select_hint_text(byfrom) if hint_text: text += hint_text + " " if select._prefixes: text += " ".join( x._compiler_dispatch(self, **kwargs) for x in select._prefixes) + " " text += self.get_select_precolumns(select) text += ', '.join(inner_columns) if froms: text += " \nFROM " if select._hints: text += ', '.join([f._compiler_dispatch(self, asfrom=True, fromhints=byfrom, **kwargs) for f in froms]) else: text += ', '.join([f._compiler_dispatch(self, asfrom=True, **kwargs) for f in froms]) else: text += self.default_from() if select._whereclause is not None: t = select._whereclause._compiler_dispatch(self, **kwargs) if t: text += " \nWHERE " + t if select._group_by_clause.clauses: group_by = select._group_by_clause._compiler_dispatch( self, **kwargs) if group_by: text += " GROUP BY " + group_by if select._having is not None: t = select._having._compiler_dispatch(self, **kwargs) if t: text += " \nHAVING " + t if select._order_by_clause.clauses: text += self.order_by_clause(select, **kwargs) if getattr(select, "_within_group_order_by_clause", None) is not None: if select._within_group_order_by_clause.clauses: text += self.within_group_order_by_clause(select, **kwargs) if select._limit is not None: text += self.limit_clause(select) if getattr(select, "_options", None) is not None: if select._options.options: text += self.options_clause(select, **kwargs) if select.for_update: text += self.for_update_clause(select) self.stack.pop(-1) if asfrom and parens: return "(" + text + ")" else: return text
def visit_select(self, select, asfrom=False, parens=True, iswrapper=False, fromhints=None, compound_index=1, force_result_map=False, nested_join_translation=False, **kwargs): entry = self.stack and self.stack[-1] or {} existingfroms = entry.get('from', None) froms = select._get_display_froms(existingfroms) correlate_froms = set(sql._from_objects(*froms)) # TODO: might want to propagate existing froms for # select(select(select)) where innermost select should correlate # to outermost if existingfroms: correlate_froms = # correlate_froms.union(existingfroms) self.stack.append({'from': correlate_froms, 'iswrapper': iswrapper}) # the actual list of columns to print in the SELECT column list. unique_co = [] distinct_alias = None for co in util.unique_list(select.inner_columns): sql_util = self._label_select_column(select, co, True, asfrom, {}) if "DISTINCT" in sql_util: distinct_alias = sql_util.split(" AS ")[-1] unique_co.append(sql_util) result_columns = [] if distinct_alias: for idx, rc_tuple in enumerate(self._result_columns): if rc_tuple[0] == distinct_alias: if rc_tuple[-2][-1] == distinct_alias: target_name = "@distinct" temp_rc = list(rc_tuple) temp_rc[0] = target_name inner_tuple = list(temp_rc[-2]) inner_tuple[-1] = target_name if not select._group_by_clause.clauses: raise AssertionError("Can't query distinct if no group by is selected") temp_rc[-2] = tuple(inner_tuple) result_columns.append(tuple(temp_rc)) else: result_columns.append(rc_tuple) if result_columns: self._result_columns = result_columns inner_columns = [ c for c in unique_co if c is not None ] text = "SELECT " # we're off to a good start ! text += self.get_select_precolumns(select) text += ', '.join(inner_columns) text += " \nFROM " text += ', '.join([f._compiler_dispatch(self, asfrom=True, **kwargs) for f in froms]) def check_match_clause(clause): left_tuple = [] right_tuple = [] match_operators = [] if isinstance(clause.type, MatchType): left_tuple.append(clause.left) right_tuple.append(clause.right) match_operators.append(clause.operator) elif isinstance(clause, Function): if clause.name.lower() == "match": func_left, func_right = clause.clauses left_tuple.append(func_left) right_tuple.append(func_right) elif isinstance(clause, ClauseList): for xclause in clause.clauses: l, r, m = check_match_clause(xclause) left_tuple.extend(l) right_tuple.extend(r) match_operators.extend(m) return left_tuple, right_tuple, match_operators if select._whereclause is not None: # Match Clauses must be done in the same compiler left_tuple = [] right_tuple = [] match_operators = [] l, r, m = check_match_clause(select._whereclause) left_tuple.extend(l) right_tuple.extend(r) match_operators.extend(m) if left_tuple and right_tuple: self.left_match = tuple(left_tuple) self.right_match = tuple(right_tuple) self.match_operators = tuple(match_operators) t = select._whereclause._compiler_dispatch(self, **kwargs) if t: text += " \nWHERE " + t if hasattr(self, "options_list"): if self.options_list: option_text = " OPTION {}".format(", ".join(self.options_list)) text += option_text if select._group_by_clause.clauses: group_by = select._group_by_clause._compiler_dispatch( self, **kwargs) text += " GROUP BY " + group_by if select._order_by_clause.clauses: text += self.order_by_clause(select, **kwargs) if select._limit is not None: text += self.limit_clause(select) self.stack.pop(-1) return text