def parse(self, response): sel = Selector(response) current_url = response.url # 详情页分析 for detail_link in CONFIG['detail_link_rule']: if is_match(current_url, detail_link): item = LiepinItem() # items接收list,防止出现index out of range item['url'] = current_url item['name'] = sel.xpath('//div[@class="title-info "]/h1/text()').extract() # [0] item['company_name'] = sel.xpath('//div[@class="title-info "]/h3/a/text()').extract() # [0] item['company_size'] = sel.xpath('//div[@class="right-post-top"]/div/text()').extract() # [4] item['company_address'] = sel.xpath('//div[@class="right-post-top"]/div/text()').extract() # [-1] item['company_type'] = sel.xpath('//div[@class="right-post-top"]/div/text()').extract() # [-3] item['pay'] = sel.xpath('//p[@class="job-main-title"]/text()').extract() # [0] item['publish_time'] = sel.xpath('//p[@class="basic-infor"]/span[2]/text()').extract() # [-1] item['requires'] = sel.xpath('//div[@class="resume clearfix"]/span/text()').extract() yield item # 过滤出所有的列表页和详情页进行回调。 for url in sel.xpath('//a/@href').extract(): url = urljoin(current_url, url) for list_link in CONFIG['list_link_rule']: if is_match(url, list_link): # log.msg('list_url: %s' % url, level=log.INFO) yield Request(url, callback=self.parse) for detail_link in CONFIG['detail_link_rule']: if is_match(url, detail_link): # log.msg('detail_url: %s' % url, level=log.INFO) yield Request(url, callback=self.parse)
def _match_song(self, song): query = f'{song.title} {song.artist}' results = self._api.search(query) hits = [song_hit['track'] for song_hit in results.get('song_hits')] # Check if song is a match for hit in hits: if is_match(hit['title'], song.title) and is_match( hit['artist'], song.artist): return hit['storeId'] return None
def search_topic(args): results = [] query = args["query"] difficulty_api = "difficulty=" + args["difficulty"] if args[ "difficulty"] else "" difficulty = "[%s]" % args["difficulty"] if args["difficulty"] else "" for k, v in LC_TOPICS.items(): if is_match(query, k): title = "%s %s" % (v[0], difficulty) subtitle = "Search LeetCode Topic '%s' %s" % (v[0], difficulty) url = "{0}?topicSlugs={1}&{2}".format(LEETCODE_URL, v[1], difficulty_api) results.append(SearchResult(title, subtitle, url)) if not results: title = query subtitle = "Search LeetCode for '%s'" % query url = "{0}?search={1}".format(LEETCODE_URL, query) results.append(SearchResult(title, subtitle, url)) return results
def test_is_match_same(self): """ Tests is_match() for 2 string""" actual = utils.is_match('1', "1") expected = True self.assertEqual(actual, expected)
def find(self, prop, query, output_prop) -> [str, list]: if query: result = list( filter(lambda x: prop in x and utils.is_match(x[prop], query), self.data)) if len(result) == 0: return "" elif len(result) == 1: print(result) return result[0][output_prop] else: return list(map(lambda x: x[output_prop], result)) else: return ""
def match_song(artist, track): """ Performs a Google Music search for the given artist and track. It then returns the first song search result that exceeds a certain threshold for fuzzy string matching on the artist and track. :param artist: A string representing the name of the artist that should be searched for and matched against. :param track: A string representing the track name that should be searched for and matched against. :return: Either the Google Music store listing id for the first eligible search result. or None in the instance that no appropriate tracks could be found. """ query = track + " " + artist search_results = gmusic.search(query) song_hits = [song_hit['track'] for song_hit in search_results['song_hits']] for hit in song_hits: if (is_match(hit['title'], track) and is_match(hit['artist'], artist)): return hit['storeId'] return None
def plain_search_match(search): """ Performs a Google Music search for the given plaintext query. :return: Either the Google Music store listing id for the first eligible search result. or None in the instance that no appropriate tracks could be found. """ search_results = gmusic.search(search) song_hits = [song_hit['track'] for song_hit in search_results['song_hits']] for hit in song_hits: if (is_match(hit['title'], search)): return hit['storeId'] return None
def TarIt(source, destDir, excludes=[]): """ take directory, tar it, put it in destination folder, file name generated from directory, replacing special characters with underscores; one success, return name of archive created """ now = datetime.datetime.now() destFile = os.path.join( destDir, socket.gethostname() + '-' + source.replace('/','_').replace(' ','_') + '-' + now.strftime('%Y%m%d-%s') + '.tar.gz' ) out = tarfile.open(destFile, 'w:gz') if excludes: # exclude if we told it to exclude it or out.add(source, recursive=True, exclude = lambda x: True if (utils.is_match(x, excludes) or not utils.can_read(x)) else False) else: out.add(source, recursive=True) print('Successfully created archive %s' % destFile) return destFile
output_folder = os.path.join(path, output) if not output_folder in folders: try: os.mkdir(output_folder) except FileExistsError as err: print(f"Folder already exists {output_folder}... {err}") # moved files checked_files = [] moved_files_counter = 0 # go through all files for doc in docs: print(f"> Looking for {doc.name}") matches = [ file for file in docs if is_match(doc, file) and file not in checked_files ] checked_files.append(doc) if len(matches) == 0: print("Not matches") continue for match in matches: checked_files.append(match) new_doc_path = os.path.join(output_folder, match.name) + doc.filetype os.rename(doc.full_path, new_doc_path) moved_files_counter += 1 print(f"\tMoved file {match.name} to {output}") print(f"Moved {moved_files_counter} of {len(docs)} elements")
help='Cuts common to both signal & background') group.add_argument('--cuts_bkg', action='store_true', help='Background only cuts') group.add_argument('--cuts_sig', action='store_true', help='Signal only cuts') opts = optparser.parse_args() import sys import os if not os.path.exists(opts.conf): sys.exit('File not found: {}'.format(opts.conf)) from tmvaconfig import ConfigFile conf = ConfigFile(opts.conf) n = conf.read() # filter out when not a match or invalid sessions = filter(lambda s: is_match(s, opts.sessions), conf.sessions()) if not sessions: sys.exit('No matching sessions!') print ' '.join(sessions) if not (opts.cvars or opts.nvars or opts.cuts_both or opts.cuts_sig or opts.cuts_bkg): sys.exit(0) # helper one_per_line = lambda i: '\n'.join(i) for session in sessions: config = conf.get_session_config(session) print '\n{}:'.format(config._name)
def test_is_match_bool(self): """ Tests is_match() for 1 string and 1 bool""" actual = utils.is_match("true", True) expected = True self.assertEqual(actual, expected)
def test_is_match_mix(self): """ Tests is_match() for 1 integers and 1 string""" actual = utils.is_match(1, "1") expected = True self.assertEqual(actual, expected)
help='Background only cuts') group.add_argument('--cuts_sig', action='store_true', help='Signal only cuts') opts = optparser.parse_args() import sys import os if not os.path.exists(opts.conf): sys.exit('File not found: {}'.format(opts.conf)) from tmvaconfig import ConfigFile conf = ConfigFile(opts.conf) n = conf.read() # filter out when not a match or invalid sessions = filter(lambda s: is_match(s, opts.sessions), conf.sessions()) if not sessions: sys.exit('No matching sessions!') print ' '.join(sessions) if not (opts.cvars or opts.nvars or opts.cuts_both or opts.cuts_sig or opts.cuts_bkg): sys.exit(0) # helper one_per_line = lambda i: '\n'.join(i) for session in sessions: config = conf.get_session_config(session) print '\n{}:'.format(config._name)
parser = argparse.ArgumentParser(description="Script that orders alphabetically a typescript file with types on it") parser.add_argument("--file", type=str, help="File containing the typescript's types") parser.add_argument("--output", type=str, default="orderTypes.ts",help="File containing the ordered types") args = parser.parse_args() file_name = args.file output_file_name = args.output original_file = open(file_name, "r") lines = original_file.readlines() output_file = open(output_file_name, "w+") list_types = [] for line in lines: if line != "\n": if is_match(line): list_types.append(TypescriptModel(line)) else: list_types[len(list_types) - 1].push_property(line) list_types.sort(key=lambda x: x.name) # order the list of objects by an attribute for model in list_types: output_file.write(model.name) for prop in model.properties: output_file.write(prop) output_file.write(os.linesep) print(f"Done! {output_file_name} created.")