def mosesOutput_to_dict(moses_sentence, sentence): tokenized_sentence = sentence.split(" ") translated_sentence = [ token.strip() for token in regex_split("\|.*?\|", moses_sentence.replace("\n", "")) ] result = re.findall("\|.*?\|", moses_sentence.replace("\n", "")) translation_index_pair = zip(translated_sentence[:-1], result) translation_to_idx = {} for (ll_index, (token_str, nl_indexes_str))\ in enumerate(translation_index_pair): ranges = [ i.replace("|", "").split('-') for i in nl_indexes_str.split(", ") ] for index_list in ranges: begin_idx, end_idx = map(int, index_list) indexes_of_translation = tuple(range(begin_idx, end_idx + 1)) exists_translation = [ get_dict_tupleKeys(idx, translation_to_idx) for idx in indexes_of_translation ] #a = functools.reduce(operator.add, translation_to_idx.keys()) print(indexes_of_translation, exists_translation) nativeLanguage_expression = " ".join( tokenized_sentence[begin_idx:end_idx + 1]) ''' if exists_translation: dict_value = (token_str, ll_index, nativeLanguage_expression) translation_to_idx[indexes_of_translation].append(dict_value) else: dict_value = [(token_str, ll_index, nativeLanguage_expression)] translation_to_idx[(indexes_of_translation)] = dict_value ''' return translation_to_idx, translated_sentence[:-1]
def validate_serie(self): pricelist_item_obj = self.env['product.pricelist.item'] if self.product_id: if self.product_id.tracking == 'serial': item_id = pricelist_item_obj.search( [('product_id', '=', self.product_id.id)], order="create_date desc", limit=1) if item_id and self.since: prefix = item_id.prefix_request if self.since[0:6] != prefix[0:6]: raise ValidationError( ("Favor de verificar el prefijo de las solicitudes" )) elif len(self.since) > 12 or len(self.since) < 12: raise ValidationError(( "El número de serie contiene {} digitos y debería tener 12 digitos, favor de verificarlo" .format(len(self.since)))) ### VALIDAR TODOS LOS NÚMEROS DE SERIE caught_initial_number = regex_findall("\d+", self.since) initial_number = caught_initial_number[-1] padding = len(initial_number) # We split the serial number to get the prefix and suffix. splitted = regex_split(initial_number, self.since) # initial_number could appear several times in the SN, e.g. BAV023B00001S00001 prefix = initial_number.join(splitted[:-1]) suffix = splitted[-1] initial_number = int(initial_number) for i in range(0, int(self.product_qty)): self.validate_exist_serie( self.product_id, '%s%s%s' % (prefix, str(initial_number + i).zfill(padding), suffix))
def split_sentences(text): sentences = regex_split(u'(?<![A-ZА-ЯЁ])([.!?]"?)(?=\s+\"?[A-ZА-ЯЁ])', text, flags=REGEX_UNICODE) s_iter = zip(*[iter(sentences[:-1])] * 2) s_iter = [''.join(map(str, y)).lstrip() for y in s_iter] s_iter.append(sentences[-1]) return s_iter
def split_sentences(text): """ This function splits sentences into unicodes. The output is unicode of broken down sentences from original text. """ sentences = regex_split(u'(?<![A-Z])([.!?]"?)(?=\s+"?[A-Z])', text, flags=REGEX_UNICODE) s_iter = zip(*[iter(sentences[:-1])] * 2) s_iter = ["".join(map(unicode, y)).lstrip() for y in s_iter] s_iter.append(sentences[-1]) return s_iter
def split_sentences(text): ''' This function splits sentences into unicodes. The output is unicode of broken down sentences from original text. ''' sentences = regex_split(u'(?<![A-Z])([.!?]"?)(?=\s+\"?[A-Z])', text, flags=REGEX_UNICODE) s_iter = zip(*[iter(sentences[:-1])] * 2) s_iter = [''.join(map(unicode, y)).lstrip() for y in s_iter] s_iter.append(sentences[-1]) return s_iter
def split_sentences(text): try: tokenizer = nltk.data.load("tokenizers/punkt/english.pickle") pre_review2 = tokenizer.tokenize(text) pre_review2 = [sent.capitalize() for sent in pre_review2] return pre_review2 except: sentences = regex_split('(?<![A-Z])([.!?]"?)(?=\s+"?[A-Z])', text) s_iter = zip(*[iter(sentences[:-1])] * 2) s_iter = ["".join(map(unicode, y)).lstrip() for y in s_iter] s_iter.append(sentences[-1]) return s_iter return ""
def split_sentences(self, text): # The regular expression matches all sentence ending punctuation and splits the string at those points. # At this point in the code, the list looks like this ["Hello, world", "!" ... ]. The punctuation and all quotation marks # are separated from the actual self.text. The first s_iter line turns each group of two items in the list into a tuple, # excluding the last item in the list (the last item in the list does not need to have this performed on it). Then, # the second s_iter line combines each tuple in the list into a single item and removes any whitespace at the beginning # of the line. Now, the s_iter list is formatted correctly but it is missing the last item of the sentences list. The # second to last line adds this item to the s_iter list and the last line returns the full list. sentences = regex_split(u'(?<![A-Z])([.!?]"?)(?=\s+\"?[A-Z])',text,flags=REGEX_UNICODE) s_iter = zip(*[iter(sentences[:-1])] * 2) s_iter = [''.join(map(unicode,y)).lstrip() for y in s_iter] s_iter.append(sentences[-1]) return s_iter
def get_keywords_meta(elem): """Returns list of words from HTML meta tag. Args: elem: Returns: keywords: list of keywords webpage without duplicates. """ web_keywords = elem.find('meta', attrs={'name': 'keywords'}) if web_keywords: keywords_content = web_keywords.get("content") keywords = [ str(i).strip(punctuation).lower() for i in regex_split(";|,| ", keywords_content) ] #split by , OR space return keywords
def split_sentences(text): ''' The regular expression matches all sentence ending punctuation and splits the string at those points. At this point in the code, the list looks like this ["Hello, world", "!" ... ]. The punctuation and all quotation marks are separated from the actual text. The first s_iter line turns each group of two items in the list into a tuple, excluding the last item in the list (the last item in the list does not need to have this performed on it). Then, the second s_iter line combines each tuple in the list into a single item and removes any whitespace at the beginning of the line. Now, the s_iter list is formatted correctly but it is missing the last item of the sentences list. The second to last line adds this item to the s_iter list and the last line returns the full list. ''' sentences = regex_split('(?<![A-Z])([.!?]"?)(?=\s+\"?[A-Z])', text) s_iter = list(zip(*[iter(sentences[:-1])] * 2)) s_iter = [''.join(map(str, y)).lstrip() for y in s_iter] s_iter.append(sentences[-1]) return s_iter
def __new__(cls, value): rawval = str(value) if type(value) in (int, float, str): value = str(value).replace(',', '.') value = regex_split('[^0-9.]', value) value = [str(e) for e in value if e] if len(value) == 1: value = [value[0], value[0]] try: assert len(value) == 2 value = tuple(sorted([float(e) for e in value])) except: raise ValueError('«%s» must be an int/float interval' ' representation' % rawval) return tuple.__new__(cls, value)
def __new__(cls, value): rawval = str(value) if type(value) in (int, float, str): value = str(value).replace(',','.') value = regex_split('[^0-9.]', value) value = [str(e) for e in value if e] if len(value) == 1: value = [value[0], value[0]] try: assert len(value) == 2 value = tuple(sorted([float(e) for e in value])) except: raise ValueError('«%s» must be an int/float interval' ' representation' %rawval) return tuple.__new__(cls, value)
def _generate_serial_numbers(self, next_serial_count=False): """ This method will generate `lot_name` from a string (field `next_serial`) and create a move line for each generated `lot_name`. """ self.ensure_one() if not next_serial_count: next_serial_count = self.next_serial_count # We look if the serial number contains at least one digit. caught_initial_number = regex_findall("\d+", self.next_serial) if not caught_initial_number: raise ValidationError( _('The serial number must contain at least one digit.')) # We base the serie on the last number find in the base serial number. initial_number = caught_initial_number[-1] padding = len(initial_number) # We split the serial number to get the prefix and suffix. splitted = regex_split(initial_number, self.next_serial) # initial_number could appear several times in the SN, e.g. BAV023B00001S00001 prefix = initial_number.join(splitted[:-1]) suffix = splitted[-1] initial_number = int(initial_number) serial_range = next_serial_count if self.product_id and self.product_id.tracking == 'lot': serial_range = int( int(self.product_uom_qty) / self.next_serial_qty) self.next_serial_count = serial_range lot_names = [] # for i in range(0, next_serial_count): for i in range(0, serial_range): lot_names.append( '%s%s%s' % (prefix, str(initial_number + i).zfill(padding), suffix)) move_lines_commands = self._generate_serial_move_line_commands( lot_names) if self.product_id and self.product_id.tracking == 'lot': for line in move_lines_commands: line_vals = line[2] if 'qty_done' in line_vals: line_vals.update({'qty_done': self.next_serial_qty}) self.write({'move_line_ids': move_lines_commands}) return True
def extract_name(name_string): """ A helper method to extract the name of the ROI, and separate it into main name, and part name :param name_string: The full string of the line with the name :type name_string: str | list of [str] :return: the first name of the region, and the sub name of the region """ if isinstance(name_string, str): name_string = name_string.split() full_name = name_string[-1] full_name = full_name.split('_') if len(full_name) > 1: # e.i. the name is in the form name_region roi_name = full_name[0] sub_name = full_name[1] else: full_name = regex_split('(\d+)', full_name[0]) roi_name = full_name[0] sub_name = full_name[1] return roi_name, sub_name
def split_sentences(text): """ the regex here matches all sentences that end with punctuation it splits the string at these points, illustr: ["Hello, world", "!", ....] punctuations are thus separated from the actual text the first s_iter turns each group of two items in the list into a tuple, excluding the last item in the list (the last item in the list does not need to have this performed on it). the second s_iter combines each tuple in the list into a single item and removes any whitespace at the beginning of the line the s_iter list is formatted correctly but it is missing the last item of the sentences list the second to last line adds this item to the s_iter list and the last line returns the full list. """ sentences = regex_split(u'(?<![A-ZА-ЯЁ])([.!?]"?)(?=\s+\"?[A-ZА-ЯЁ])', text, flags=REGEX_UNICODE) s_iter = zip(*[iter(sentences[:-1])] * 2) s_iter = [''.join(map(unicode, y)).lstrip() for y in s_iter] s_iter.append(sentences[-1]) return s_iter
def onchange_serie(self): pricelist_item_obj = self.env['product.pricelist.item'] lot_obj = self.env['stock.production.lot'] # print("-------------------------------------onchange series_start y series_end---------------------------------------------") for record in self: record.product_uom_qty = 0 if record.series_start and record.series_end: if '-' in record.series_start and '-' in record.series_end: if record.series_end.split('-')[1].isnumeric() and record.series_start.split('-')[1].isnumeric(): if int(record.series_end.split('-')[1]) >= int(record.series_start.split('-')[1]): record.product_uom_qty = int(record.series_end.split('-')[1]) - int(record.series_start.split('-')[1]) + 1 elif record.series_start.isnumeric() and record.series_end.isnumeric(): if int(record.series_end) >= int(record.series_start.isnumeric()): record.product_uom_qty = int(record.series_end) - int(record.series_start) + 1 ### Calculando el producto al cual hace referencia la serie if record.series_start: product_prefix = record.series_start[0:6] pricelist_id = pricelist_item_obj.search([ ('prefix_request','=',product_prefix),('company_id','=',self.company_id.id)], order="create_date desc",limit=1) if not pricelist_id: raise ValidationError(( 'No se pudo validar a que prefijo hace referencia "{}" favor de verificarlo con sistemas'.format(product_prefix))) record.product_id = pricelist_id.product_id.id or False ### VALIDAR TODOS LOS NÚMEROS DE SERIE caught_initial_number = regex_findall("\d+", record.series_start) initial_number = caught_initial_number[-1] padding = len(initial_number) # We split the serial number to get the prefix and suffix. splitted = regex_split(initial_number, record.series_start) # initial_number could appear several times in the SN, e.g. BAV023B00001S00001 prefix = initial_number.join(splitted[:-1]) suffix = splitted[-1] initial_number = int(initial_number) for i in range(0, int(record.product_uom_qty)): self.validate_location_serie(record.picking_id.location_id,'%s%s%s' % ( prefix, str(initial_number + i).zfill(padding), suffix))
def generate_lot_names(self, first_lot, count): """Generate `lot_names` from a string.""" # We look if the first lot contains at least one digit. caught_initial_number = regex_findall(r"\d+", first_lot) if not caught_initial_number: return self.generate_lot_names(first_lot + "0", count) # We base the series on the last number found in the base lot. initial_number = caught_initial_number[-1] padding = len(initial_number) # We split the lot name to get the prefix and suffix. splitted = regex_split(initial_number, first_lot) # initial_number could appear several times, e.g. BAV023B00001S00001 prefix = initial_number.join(splitted[:-1]) suffix = splitted[-1] initial_number = int(initial_number) lot_names = [] for i in range(0, count): lot_names.append( '%s%s%s' % (prefix, str(initial_number + i).zfill(padding), suffix)) return lot_names
def read_article(file_name): #pdfFileObj = open(file_name, 'rb') #pdfReader = PyPDF2.PdfFileReader(pdfFileObj) #pageObj = pdfReader.getPage(0) #file = pageObj.extractText() resource_manager = PDFResourceManager() fake_file_handle = io.StringIO() converter = TextConverter(resource_manager, fake_file_handle) page_interpreter = PDFPageInterpreter(resource_manager, converter) with open(file_name, 'rb') as fh: for page in PDFPage.get_pages(fh, caching=True, check_extractable=True): page_interpreter.process_page(page) file = fake_file_handle.getvalue() # close open handles converter.close() fake_file_handle.close() #print("value of file",file) #print("type of file",type(file)) #print("value: ",file[0]) #article = file.split(".") #sentences = [] sentences = regex_split(u'(?<![A-ZА-ЯЁ])([.!?]"?)(?=\s+\"?[A-ZА-ЯЁ])', file, flags=REGEX_UNICODE) s_iter = zip(*[iter(sentences[:-1])] * 2) s_iter = [''.join(map(str,y)).lstrip() for y in s_iter] s_iter.append(sentences[-1]) #print(s_iter) #Sprint(type(s_iter)) return s_iter
def get_nombre_archivo_acta(self, obj): """ Medoto que obtiene la el nombre del archivo acta """ basename = path.basename(obj.archivo_acta_constitutiva.name) return u''.join(regex_split(FILENAME_REGEX, basename))
def word_count(phrase): wordlist = [x for x in filter(lambda x: len(x) > 0, regex_split(r'[^a-zA-Z|\'|0-9]', phrase.lower()))] return Counter(map(unquote, wordlist))
def splitPunctuation(string): if isinstance(string, str): return regex_split("[" + SpellCheckHelper.regex_punctuation + "]", string) return [SpellCheckHelper.splitPunctuation(item) for item in string]
def basename(fobj): basename = path.basename(fobj.name) return u''.join(regex_split(FILENAME_REGEX, basename))
def abbreviate(words): return ''.join([x[:1].upper() for x in regex_split(r'\s+|-', words)])
def create(self, vals): picking_obj = self.env['stock.picking'] move_line_obj = self.env['stock.move.line'] lot_obj = self.env['stock.production.lot'] pricelist_item_obj = self.env['product.pricelist.item'] product_obj = self.env['product.product'] picking_id = picking_obj.browse(vals.get('picking_id')) if picking_id.type_transfer == 'as-ov': if vals.get('product_id'): product_id = product_obj.browse(vals.get('product_id')) if product_id.tracking == 'serial': item_id = pricelist_item_obj.search([('product_id','=',product_id.id),('company_id','=',vals.get('company_id'))], order="create_date desc",limit=1) if item_id: vals['papeleria'] = item_id.stationery res = super(StockMove, self).create(vals) if vals.get('picking_id'): if picking_id.type_transfer in ('ov-as','as-ov','cont-ov'): lot_id = lot_obj.search([('name','=',res.series),('company_id','=',vals.get('company_id'))],limit=1) data = { 'picking_id' : picking_id.id, 'move_id': res.id, 'product_id': res.product_id.id, 'product_uom_id' : res.product_id.uom_id.id, 'qty_done' : 1, 'lot_id' : lot_id.id or False, 'location_id' : picking_id.location_id.id, 'location_dest_id' : picking_id.location_dest_id.id, 'state' : 'assigned', 'reference' : res.reference, } move_line_obj.create(data) lot_id.employee_id = picking_id.employee_id.id or False # as-ov Validar enganche > 0 if picking_id.type_transfer == 'as-ov': origen = "" if vals.get('origen_solicitud'): origen = vals.get('origen_solicitud') else: origen = self.origen_solicitud if not origen in ('cancelada','extravio','sobrantes'): inversion_inicial = 0 if vals.get('inversion_inicial'): inversion_inicial = vals.get('inversion_inicial') else: inversion_inicial = self.inversion_inicial if inversion_inicial <= 0: raise ValidationError("La inversión inicial de una solicitud está en cero") if self.amount_received < self.papeleria: raise ValidationError("El importe recibido de una solicitud es menor a la papeleria") ### SI EL MOVIMIENTO ES OFICINA DE VENTAS - ASISTENTE if picking_id.type_transfer == 'ov-as': location_id = picking_id.location_id ### SI NO, VIENE DEL ASISTENTE A LA OFICINA DE VENTAS else: location_id = picking_id.location_dest_id ### SI HAY UBICACIÓN if location_id: ### BUSCA EL ALMACÉN AL QUE PERTENECE LA TRANSFERENCIA warehouse_id = picking_id.location_id.get_warehouse() ### SI SE ENCONTRÓ EL ALMACÉN if warehouse_id: ### PONE LA SOLICITUD EN EL ALMACÉN DE LA TRANSFERENCIA lot_id.warehouse_id = warehouse_id.id res.state = 'assigned' if picking_id.type_transfer in ('ac-ov','ov-ac'): ### VALIDAR TODOS LOS NÚMEROS DE SERIE caught_initial_number = regex_findall("\d+", res.series_start) initial_number = caught_initial_number[-1] padding = len(initial_number) # We split the serial number to get the prefix and suffix. splitted = regex_split(initial_number, res.series_start) # initial_number could appear several times in the SN, e.g. BAV023B00001S00001 prefix = initial_number.join(splitted[:-1]) suffix = splitted[-1] initial_number = int(initial_number) for i in range(0, int(res.product_uom_qty)): serie = '{}{}{}'.format(prefix,str(initial_number + i).zfill(padding),suffix) lot_id = lot_obj.search([('name','=',serie),('company_id','=',vals.get('company_id'))],limit=1) or False data = { 'picking_id' : picking_id.id, 'move_id': res.id, 'product_id': res.product_id.id, 'product_uom_id' : res.product_id.uom_id.id, 'qty_done' : 1, 'lot_id' : lot_id.id, 'location_id' : picking_id.location_id.id, 'location_dest_id' : picking_id.location_dest_id.id, 'state' : 'assigned', 'reference' : res.reference, } move_line_obj.create(data) res.state = 'assigned' if picking_id.type_transfer in ('servicios','reparaciones'): lot_id = lot_obj.search([('product_id','=', res.product_id.id), ('name','=',res.service_item_number), ('company_id','=',vals.get('company_id'))],limit=1) data = { 'picking_id' : picking_id.id, 'move_id': res.id, 'product_id': res.product_id.id, 'product_uom_id' : res.product_id.uom_id.id, 'qty_done' : 1, 'lot_id' : lot_id.id or False, 'location_id' : picking_id.location_id.id, 'location_dest_id' : picking_id.location_dest_id.id, 'state' : 'assigned', 'reference' : res.reference, } #raise ValidationError("{}".format(data) ) move_line_obj.create(data) res.state = 'assigned' ##### Salida a consumo if picking_id.type_transfer == 'consumo': data = { 'picking_id' : picking_id.id, 'move_id': res.id, 'product_id': res.product_id.id, 'product_uom_id' : res.product_id.uom_id.id, 'qty_done' : res.product_uom_qty, 'location_id' : picking_id.location_id.id, 'location_dest_id' : picking_id.location_dest_id.id, 'state' : 'assigned', 'reference' : res.reference, } move_line_obj.create(data) res.state = 'assigned' return res
def create_serial_num(self): """ Purpose: to create the serial numbers we want including the first serial number Method Method logic is created based off stock move method (_generate_serial_numbers) """ caught_initial_number = regex_findall("\d+", self.first_serial) initial_number = caught_initial_number[-1] padding = len(initial_number) splitted = regex_split( initial_number, self.first_serial ) # We split the serial number to get the prefix and suffix. prefix = initial_number.join( splitted[:-1] ) #initial_number could appear several times in the SN, e.g. BAV023B00001S00001 suffix = splitted[-1] initial_number = int(initial_number) lot_names = [] for i in range(0, self.serial_count): lot_names.append( '%s%s%s' % (prefix, str(initial_number + i).zfill(padding), suffix)) # existing serial numbers in the range in stock.production.lot existing_lots = self.env['stock.production.lot'].search([ ('name', 'in', lot_names), ('product_id', '=', self.product_id.id), ('company_id', '=', self.company_id.id) ]) # all created serial numbers in stock.production.lot associated with the order_id created_lot_names = self.env['flsp.serialnumline'].search([ ('order_id', '=', self.id) ]).mapped('serial_num') if len(existing_lots) > 0 or (len(created_lot_names) > len(lot_names)): self._write_existing_serialnum_lines(existing_lots) absent_lot_names = [] for line in lot_names: if not line in existing_lots.mapped('name'): absent_lot_names.append(line) extra_lot_names = [] for line in created_lot_names: if not line in lot_names: extra_lot_names.append(line) # open wizard to let user choose what to do next return { 'name': 'FLSP Serial Number Wizard', 'view_mode': 'form', 'view_id': self.env.ref( 'flspserialnum.flsp_serial_num_wizard_form_view').id, 'res_model': 'flsp.serial.num.wizard', 'type': 'ir.actions.act_window', 'target': 'new', 'context': { 'default_order_id': self.id, 'default_existing_lot_names': existing_lots.mapped('name'), 'default_absent_lot_names': absent_lot_names, 'default_extra_lot_names': extra_lot_names, } } else: lots = self.create_absent_serial_num(lot_names) self._write_existing_serialnum_lines(lots) return True
def get_nombre_archivo_rif(self, obj): """ Medoto que obtiene el nombre del archivo rif """ basename = path.basename(obj.archivo_rif.name) return u''.join(regex_split(FILENAME_REGEX, basename))
# Nuts and Bolts def sesame_resolve(name): # This handy function from KMI url = "http://vizier.u-strasbg.fr/viz-bin/nph-sesame/-oI/SNV?" object = quote(name) ra = None dec = None identifiers = [] try: simbad_lines = urlopen(url + object).readlines() except Exception, e: raise SesameError("Unable to connect to Sesame server", e) for line in simbad_lines: line = line.strip() if line.startswith("%J "): fields = regex_split(r" +", line) try: ra = float(fields[1])/15.0 # raises ValueError, IndexError dec = float(fields[2]) # raises ValueError, IndexError except (ValueError, IndexError), e: raise SesameError("Error parsing Sesame response", e) if line.startswith("%I "): fields = line.split(" ", 1) try: identifiers.append(fields[1]) # raises IndexError except IndexError, e: raise SesameError("Error parsing Sesame response", e) if ra == None or dec == None: raise NameNotFoundError("Name not found by Sesame server") return (ra, dec, identifiers)