def main(args): with DwCAReader(args.source_file) as dwca: if valid_dwca(dwca): ordered_fields = list(dwca.core_terms) # Only last part as Shapefiles field names are limited to 10 chars ordered_fields_truncated = [ f.rsplit('/')[-1] for f in ordered_fields ] import pdb pdb.set_trace() with ShapefileOutput(args.destination, args.crs, ordered_fields_truncated) as out: for line in dwca.each_line(): try: gis_data = dwcaline_to_epsg4326(line) additional_values = [ unicode_to_ascii(line.data[f]) for f in ordered_fields ] out.insert_line(gis_data['lat'], gis_data['lon'], additional_values) sys.stdout.write('.') except CannotConvertException: pass
def main(args): with DwCAReader(args.source_file) as dwca: if valid_dwca(dwca): ordered_fields = list(dwca.core_terms) # Only last part as Shapefiles field names are limited to 10 chars ordered_fields_truncated = [f.rsplit('/')[-1] for f in ordered_fields] import pdb; pdb.set_trace() with ShapefileOutput(args.destination, args.crs, ordered_fields_truncated) as out: for line in dwca.each_line(): try: gis_data = dwcaline_to_epsg4326(line) additional_values = [unicode_to_ascii(line.data[f]) for f in ordered_fields] out.insert_line(gis_data['lat'], gis_data['lon'], additional_values) sys.stdout.write('.') except CannotConvertException: pass
def preprocess(tensor: tf.Tensor) -> str: """ Pre-process sequence of text for a translation task :param tensor: Eager tf.string tensor (can use .numpy() method) :return: str containing the pre-processed sequence """ sentence = tensor.numpy().decode('UTF-8') # Converts to lowercase ascii representation sentence = unicode_to_ascii(sentence.lower().strip()) # Creating a space between a word and the punctuation following it # eg: "he is a boy." => "he is a boy ." sentence = re.sub(r"([?.!,¿])", r" \1 ", sentence) sentence = re.sub(r'[" "]+', " ", sentence) # Replacing everything with space except (a-z, A-Z, ".", "?", "!", ",") sentence = re.sub(r"[^a-zA-Z?.!,¿]+", " ", sentence) # Removing spaces sentence = sentence.rstrip().strip() return '<start> ' + sentence + ' <end>'
def lookup(bot, event, *args): """find keywords in a specified spreadsheet""" if not bot.get_config_suboption(event.conv_id, 'spreadsheet_enabled'): return _("Spreadsheet function disabled") if not bot.get_config_suboption(event.conv_id, 'spreadsheet_url'): return _("Spreadsheet URL not set") spreadsheet_url = bot.get_config_suboption(event.conv_id, 'spreadsheet_url') table_class = "waffle" # Name of table class to search. Note that 'waffle' seems to be the default for all spreadsheets if args[0].startswith('<'): counter_max = int(args[0][1:]) # Maximum rows displayed per query keyword = ' '.join(args[1:]) else: counter_max = 5 keyword = ' '.join(args) htmlmessage = _('Results for keyword <b>{}</b>:\n').format(keyword) logger.debug("{0} ({1}) has requested to lookup '{2}'".format(event.user.full_name, event.user.id_.chat_id, keyword)) html = urllib.request.urlopen(spreadsheet_url).read() keyword_raw = keyword.strip().lower() keyword_ascii = unicode_to_ascii(keyword_raw) data = [] counter = 0 # Adapted from http://stackoverflow.com/questions/23377533/python-beautifulsoup-parsing-table from bs4 import BeautifulSoup soup = BeautifulSoup(str(html, 'utf-8'), 'html.parser') table = soup.find('table', attrs={'class':table_class}) table_body = table.find('tbody') rows = table_body.find_all('tr') for row in rows: col = row.find_all('td') cols = [ele.text.strip() for ele in col] data.append([ele for ele in cols if ele]) # Get rid of empty values for row in data: for cell in row: cellcontent_raw = str(cell).lower().strip() cellcontent_ascii = unicode_to_ascii(cellcontent_raw) if keyword_raw in cellcontent_raw or keyword_ascii in cellcontent_ascii: if counter < counter_max: htmlmessage += _('\nRow {}: ').format(counter+1) for datapoint in row: htmlmessage += '{} | '.format(datapoint) htmlmessage += '\n' counter += 1 break # prevent multiple subsequent cell matches appending identical rows else: # count row matches only beyond the limit, to avoid over-long message counter += 1 if counter > counter_max: htmlmessage += _('\n{0} rows found. Only returning first {1}.').format(counter, counter_max) if counter_max == 5: htmlmessage += _('\nHint: Use <b>/bot lookup <{0} {1}</b> to view {0} rows').format(counter_max*2, keyword) if counter == 0: htmlmessage += _('No match found') return htmlmessage
def lookup(bot, event, *args): """find keywords in a specified spreadsheet""" if not bot.get_config_suboption(event.conv_id, 'spreadsheet_enabled'): yield from bot.coro_send_message(event.conv, _("Spreadsheet function disabled")) return if not bot.get_config_suboption(event.conv_id, 'spreadsheet_url'): yield from bot.coro_send_message(event.conv, _("Spreadsheet URL not set")) return spreadsheet_url = bot.get_config_suboption(event.conv_id, 'spreadsheet_url') table_class = "waffle" # Name of table class to search. Note that 'waffle' seems to be the default for all spreadsheets if args[0].startswith('<'): counter_max = int(args[0][1:]) # Maximum rows displayed per query keyword = ' '.join(args[1:]) else: counter_max = 5 keyword = ' '.join(args) htmlmessage = _('Results for keyword <b>{}</b>:<br />').format(keyword) logger.debug("{0} ({1}) has requested to lookup '{2}'".format(event.user.full_name, event.user.id_.chat_id, keyword)) html = urllib.request.urlopen(spreadsheet_url).read() keyword_raw = keyword.strip().lower() keyword_ascii = unicode_to_ascii(keyword_raw) data = [] counter = 0 # Adapted from http://stackoverflow.com/questions/23377533/python-beautifulsoup-parsing-table from bs4 import BeautifulSoup soup = BeautifulSoup(str(html, 'utf-8'), 'html.parser') table = soup.find('table', attrs={'class':table_class}) table_body = table.find('tbody') rows = table_body.find_all('tr') for row in rows: col = row.find_all('td') cols = [ele.text.strip() for ele in col] data.append([ele for ele in cols if ele]) # Get rid of empty values for row in data: for cell in row: cellcontent_raw = str(cell).lower().strip() cellcontent_ascii = unicode_to_ascii(cellcontent_raw) if keyword_raw in cellcontent_raw or keyword_ascii in cellcontent_ascii: if counter < counter_max: htmlmessage += _('<br />Row {}: ').format(counter+1) for datapoint in row: htmlmessage += '{} | '.format(datapoint) htmlmessage += '<br />' counter += 1 break # prevent multiple subsequent cell matches appending identical rows else: # count row matches only beyond the limit, to avoid over-long message counter += 1 if counter > counter_max: htmlmessage += _('<br />{0} rows found. Only returning first {1}.').format(counter, counter_max) if counter_max == 5: htmlmessage += _('<br />Hint: Use <b>/devilbot lookup <{0} {1}</b> to view {0} rows').format(counter_max*2, keyword) if counter == 0: htmlmessage += _('No match found') yield from bot.coro_send_message(event.conv, htmlmessage)