def compile_regexps (self): """Compile our regular expressions for the rezkonv format. """ testtimer = TimeAction('mealmaster_importer.compile_regexps',10) debug("start compile_regexps",5) plaintext_importer.TextImporter.compile_regexps(self) self.start_matcher = re.compile(rzc_start_pattern) self.end_matcher = re.compile("^[=M-][=M-][=M-][=M-][=M-]\s*$") self.group_matcher = re.compile("^\s*([=M-][=M-][=M-][=M-][=M-]+)-*\s*([^-]+)\s*-*",re.IGNORECASE) self.ing_cont_matcher = re.compile("^\s*[-;]") self.ing_opt_matcher = re.compile("(.+?)\s*\(?\s*optional\)?\s*$",re.IGNORECASE) # or or the German, oder self.ing_or_matcher = re.compile("^[-= ]*[Oo][dD]?[eE]?[Rr][-= ]*$",re.IGNORECASE) self.variation_matcher = re.compile("^\s*(VARIATION|HINT|NOTES?|VERÄNDERUNG|VARIANTEN|TIPANMERKUNGEN)(:.*)?",re.IGNORECASE) # a crude ingredient matcher -- we look for two numbers, intermingled with spaces # followed by a space or more, followed by a two digit unit (or spaces) self.ing_num_matcher = re.compile( "^\s*%(top)s%(num)s+\s+[A-Za-z ][A-Za-z ]? .*"%{'top':convert.DIVIDEND_REGEXP, 'num':convert.NUMBER_REGEXP}, re.IGNORECASE) self.amt_field_matcher = convert.NUMBER_MATCHER # we build a regexp to match anything that looks like # this: ^\s*ATTRIBUTE: Some entry of some kind...$ attrmatch="^\s*(" self.mmf = rzc for k in self.mmf.recattrs.keys(): attrmatch += "%s|"%re.escape(k) attrmatch="%s):\s*(.*)\s*$"%attrmatch[0:-1] self.attr_matcher = re.compile(attrmatch) testtimer.end()
def add_ref (self, id): timeaction = TimeAction('importer.add_ref',10) if not self.id_converter.has_key(id): self.id_converter[id]=self.rd.new_id() self.ing['refid']=self.id_converter[id] self.ing['unit']='recipe' timeaction.end()
def compile_regexps (self): testtimer = TimeAction('mealmaster_importer.compile_regexps',10) debug("start compile_regexps",5) plaintext_importer.TextImporter.compile_regexps(self) self.start_matcher = re.compile(mm_start_pattern) self.end_matcher = re.compile("^[M-][M-][M-][M-][M-]\s*$") self.group_matcher = re.compile("^\s*([M-][M-][M-][M-][M-])-*\s*([^-]+)\s*-*",re.IGNORECASE) self.ing_cont_matcher = re.compile("^\s*[-;]") self.ing_opt_matcher = re.compile("(.+?)\s*\(?\s*optional\)?\s*$",re.IGNORECASE) self.ing_or_matcher = re.compile("^[- ]*[Oo][Rr][- ]*$",re.IGNORECASE) self.variation_matcher = re.compile("^\s*(VARIATION|HINT|NOTES?)(:.*)?",re.IGNORECASE) # a crude ingredient matcher -- we look for two numbers, # intermingled with spaces followed by a space or more, # followed by a two digit unit (or spaces) self.ing_num_matcher = re.compile( "^\s*%s+\s+[a-z ]{1,2}\s+.*\w+.*"%convert.NUMBER_REGEXP, re.IGNORECASE) self.amt_field_matcher = re.compile("^(\s*%s\s*)$"%convert.NUMBER_REGEXP) # we build a regexp to match anything that looks like # this: ^\s*ATTRIBUTE: Some entry of some kind...$ self.mmf = mmf attrmatch="^\s*(" for k in self.mmf.recattrs.keys(): attrmatch += "%s|"%re.escape(k) attrmatch="%s):\s*(.*)\s*$"%attrmatch[0:-1] self.attr_matcher = re.compile(attrmatch) testtimer.end()
def find_unit_field(self, fields, fields_is_numfield): testtimer = TimeAction('mealmaster_importer.find_unit_field', 10) if 0 < fields[0][1] - fields[0][0] <= self.unit_length and len( fields) > 1: testtimer.end() return fields[0] testtimer.end()
def __init__( self, rd=None, # OBSOLETE total=0, prog=None, # OBSOLETE do_markup=True, conv=None, rating_converter=None, name="importer", ): """rd is our recipeData instance. Total is used to keep track of progress. do_markup should be True if instructions and modifications come to us unmarked up (i.e. if we need to escape < and &, etc. -- this might be False if importing e.g. XML). """ timeaction = TimeAction("importer.__init__", 10) if not conv: self.conv = convert.get_converter() self.id_converter = {} # a dictionary for tracking named IDs self.total = total if prog or rd: import traceback traceback.print_stack() if prog: print "WARNING: ", self, "handed obsolete parameter prog=", prog if rd: print "WARNING: ", self, "handed obsolete parameter rd=", rd self.do_markup = do_markup self.count = 0 self.rd = get_recipe_manager() self.rd_orig_ing_hooks = self.rd.add_ing_hooks self.added_recs = [] self.added_ings = [] # self.rd_orig_hooks = self.rd.add_hooks self.rd.add_ing_hooks = [] # self.rd.add_hooks = [] self.position = 0 self.group = None # allow threaded calls to pause self.suspended = False # allow threaded calls to be terminated (this # has to be implemented in subclasses). self.terminated = False # Our rating converter -- if we've been handed a class, we # assume our caller will handle doing the # conversion. Otherwise we do it ourselves. if rating_converter: self.rating_converter = rating_converter self.do_conversion = False else: self.rating_converter = RatingConverter() self.do_conversion = True self.km = keymanager.get_keymanager() timeaction.end() SuspendableThread.__init__(self, name=name)
def add_unit (self, unit): testtimer = TimeAction('mealmaster_importer.add_unit',10) unit = unit.strip() if self.mmf.unit_conv.has_key(unit): unit = self.mmf.unit_conv[unit] importer.Importer.add_unit(self,unit) testtimer.end()
def add_unit(self, unit): testtimer = TimeAction('mealmaster_importer.add_unit', 10) unit = unit.strip() if self.mmf.unit_conv.has_key(unit): unit = self.mmf.unit_conv[unit] importer.Importer.add_unit(self, unit) testtimer.end()
def add_ref(self, id): timeaction = TimeAction("importer.add_ref", 10) if not self.id_converter.has_key(id): self.id_converter[id] = self.rd.new_id() self.ing["refid"] = self.id_converter[id] self.ing["unit"] = "recipe" timeaction.end()
def __init__(self, filename='Data/mealmaster.mmf', prog=None, source=None, threaded=True, two_col_minimum=38, conv=None): """filename is the file to parse (or filename). rd is the recData instance to start with. prog is a function we tell about our prog to (we hand it a single arg).""" testtimer = TimeAction('mealmaster_importer.__init__', 10) debug("mmf_importer start __init__ ", 5) self.source = source self.header = False self.instr = "" self.ingrs = [] self.ing_added = False self.in_variation = False self.fn = filename self.prog = prog self.unit_length = 2 self.two_col_minimum = two_col_minimum self.last_line_was = None plaintext_importer.TextImporter.__init__(self, filename) #prog=prog, #threaded=threaded,conv=conv) testtimer.end()
def add_ing(self, ingdict): """Add ingredient to ingredients_table based on ingdict and return ingredient object. Ingdict contains: id: recipe_id unit: unit item: description key: keyed descriptor alternative: not yet implemented (alternative) #optional: yes|no optional: True|False (boolean) position: INTEGER [position in list] refid: id of reference recipe. If ref is provided, everything else is irrelevant except for amount. """ self.changed = True debug('adding to ingredients_table %s' % ingdict, 3) timer = TimeAction('rdatabase.add_ing 2', 5) if ingdict.has_key('amount') and not ingdict['amount']: del ingdict['amount'] self.ingredients_table.append(ingdict) timer.end() debug('running ing hooks %s' % self.add_ing_hooks, 3) timer = TimeAction('rdatabase.add_ing 3', 5) if self.add_ing_hooks: self.run_hooks(self.add_ing_hooks, self.ingredients_table[-1]) timer.end() debug('done with ing hooks', 3) return self.ingredients_table[-1]
def add_ing (self, ingdict): """Add ingredient to ingredients_table based on ingdict and return ingredient object. Ingdict contains: id: recipe_id unit: unit item: description key: keyed descriptor alternative: not yet implemented (alternative) #optional: yes|no optional: True|False (boolean) position: INTEGER [position in list] refid: id of reference recipe. If ref is provided, everything else is irrelevant except for amount. """ self.changed=True debug('adding to ingredients_table %s'%ingdict,3) timer = TimeAction('rdatabase.add_ing 2',5) if ingdict.has_key('amount') and not ingdict['amount']: del ingdict['amount'] self.ingredients_table.append(ingdict) timer.end() debug('running ing hooks %s'%self.add_ing_hooks,3) timer = TimeAction('rdatabase.add_ing 3',5) if self.add_ing_hooks: self.run_hooks(self.add_ing_hooks, self.ingredients_table[-1]) timer.end() debug('done with ing hooks',3) return self.ingredients_table[-1]
def compile_regexps(self): testtimer = TimeAction('mealmaster_importer.compile_regexps', 10) debug("start compile_regexps", 5) plaintext_importer.TextImporter.compile_regexps(self) self.start_matcher = re.compile(mm_start_pattern) self.end_matcher = re.compile("^[M-][M-][M-][M-][M-]\s*$") self.group_matcher = re.compile( "^\s*([M-][M-][M-][M-][M-])-*\s*([^-]+)\s*-*|^\s*---\s*([^-]+)\s*---\s*$", re.IGNORECASE) self.ing_cont_matcher = re.compile("^\s*[-;]") self.ing_opt_matcher = re.compile("(.+?)\s*\(?\s*optional\)?\s*$", re.IGNORECASE) self.ing_or_matcher = re.compile("^[- ]*[Oo][Rr][- ]*$", re.IGNORECASE) self.variation_matcher = re.compile( "^\s*(VARIATION|HINT|NOTES?)(:.*)?", re.IGNORECASE) # a crude ingredient matcher -- we look for two numbers, # intermingled with spaces followed by a space or more, # followed by a two digit unit (or spaces) c = convert.get_converter() self.ing_num_matcher = re.compile( "^\s*%s+\s+([a-z ]{1,2}|%s)\s+.*\w+.*" % (convert.NUMBER_REGEXP, '(' + '|'.join(filter(lambda x: x, c.unit_dict.keys())) + ')'), re.IGNORECASE) self.amt_field_matcher = re.compile("^(\s*%s\s*)$" % convert.NUMBER_REGEXP) # we build a regexp to match anything that looks like # this: ^\s*ATTRIBUTE: Some entry of some kind...$ self.mmf = mmf attrmatch = "^\s*(" for k in self.mmf.recattrs.keys(): attrmatch += "%s|" % re.escape(k) attrmatch = "%s):\s*(.*)\s*$" % attrmatch[0:-1] self.attr_matcher = re.compile(attrmatch) testtimer.end()
def add_item (self, item): testtimer = TimeAction('mealmaster_importer.add_item',10) self.ing['item']=item.strip() # fixing bug 1061363, potatoes; cut and mashed should become just potatoes # for keying purposes key_base = self.ing['item'].split(";")[0] self.ing['ingkey']=self.km.get_key_fast(key_base) testtimer.end()
def add_amt (self, amount): timeaction = TimeAction('importer.add_amt',10) """We should NEVER get non-numeric amounts. Amounts must contain [/.0-9 ] e.g. 1.2 or 1 1/5 or 1/3 etc.""" #gt.gtk_update() self.ing['amount'],self.ing['rangeamount']=parse_range(amount) timeaction.end()
def add_item(self, item): testtimer = TimeAction('mealmaster_importer.add_item', 10) self.ing['item'] = item.strip() # fixing bug 1061363, potatoes; cut and mashed should become just potatoes # for keying purposes key_base = self.ing['item'].split(";")[0] self.ing['ingkey'] = self.km.get_key_fast(key_base) testtimer.end()
def __init__( self, rd=None, # OBSOLETE total=0, prog=None, # OBSOLETE do_markup=True, conv=None, rating_converter=None, name='importer'): """rd is our recipeData instance. Total is used to keep track of progress. do_markup should be True if instructions and modifications come to us unmarked up (i.e. if we need to escape < and &, etc. -- this might be False if importing e.g. XML). """ timeaction = TimeAction('importer.__init__', 10) if not conv: self.conv = convert.get_converter() self.id_converter = {} # a dictionary for tracking named IDs self.total = total if prog or rd: import traceback traceback.print_stack() if prog: print('WARNING: ', self, 'handed obsolete parameter prog=', prog) if rd: print('WARNING: ', self, 'handed obsolete parameter rd=', rd) self.do_markup = do_markup self.count = 0 self.rd = get_recipe_manager() self.rd_orig_ing_hooks = self.rd.add_ing_hooks self.added_recs = [] self.added_ings = [] #self.rd_orig_hooks = self.rd.add_hooks self.rd.add_ing_hooks = [] #self.rd.add_hooks = [] self.position = 0 self.group = None # allow threaded calls to pause self.suspended = False # allow threaded calls to be terminated (this # has to be implemented in subclasses). self.terminated = False # Our rating converter -- if we've been handed a class, we # assume our caller will handle doing the # conversion. Otherwise we do it ourselves. if rating_converter: self.rating_converter = rating_converter self.do_conversion = False else: self.rating_converter = RatingConverter() self.do_conversion = True self.km = keymanager.get_keymanager() timeaction.end() SuspendableThread.__init__(self, name=name)
def field_width(tuple): testtimer = TimeAction('mealmaster_importer.field_width', 10) debug("start field_width", 10) if tuple[1]: testtimer.end() return tuple[1] - tuple[0] else: testtimer.end() return None
def field_width (tuple): testtimer = TimeAction('mealmaster_importer.field_width',10) debug("start field_width",10) if tuple[1]: testtimer.end() return tuple[1]-tuple[0] else: testtimer.end() return None
def get_fields(string, tuples): testtimer = TimeAction('mealmaster_importer.get_fields', 10) debug("start get_fields", 10) lst = [] for t in tuples: if t: lst.append(string[t[0]:t[1]]) else: lst.append("") testtimer.end() return lst
def get_fields (string, tuples): testtimer = TimeAction('mealmaster_importer.get_fields',10) debug("start get_fields",10) lst = [] for t in tuples: if t: lst.append(string[t[0]:t[1]]) else: lst.append("") testtimer.end() return lst
def handle_group (self, groupm): """Start a new ingredient group.""" testtimer = TimeAction('mealmaster_importer.handle_group',10) debug("start handle_group",10) # the only group of the match will contain # the name of the group. We'll put it into # a more sane title case (MealMaster defaults # to all caps name = groupm.groups()[1].title() self.group=name if re.match('^[^A-Za-z]*$',self.group): self.group=None testtimer.end()
def __init__(self): # hooks run after adding, modifying or deleting a recipe. # Each hook is handed the recipe, except for delete_hooks, # which is handed the ID (since the recipe has been deleted) self.add_hooks = [] self.modify_hooks = [] self.delete_hooks = [] self.add_ing_hooks = [] timer = TimeAction('initialize_connection + setup_tables', 2) self.initialize_connection() self.setup_tables() timer.end()
def check_for_sleep (self): timeaction = TimeAction('importer.check_for_sleep',10) #gt.gtk_update() if self.terminated: raise Terminated("Importer Terminated!") while self.suspended: #gt.gtk_update() if self.terminated: raise Terminated("Importer Terminated!") else: time.sleep(1) timeaction.end()
def start_ing (self, **kwargs): timeaction = TimeAction('importer.start_ing',10) #gt.gtk_update() self.ing=kwargs #if self.ing.has_key('id'): # self.ing['recipe_id']=self.ing['id'] # del self.ing['id'] # print 'WARNING: setting ingredients ID is deprecated. Assuming you mean to set recipe_id' #elif self.rec.has_key('id'): # self.ing['recipe_id']=self.rec['id'] #debug('ing ID %s, recipe ID %s'%(self.ing['recipe_id'],self.rec['id']),0) timeaction.end()
def field_match(strings, tup, matcher): testtimer = TimeAction('mealmaster_importer.field_match', 10) debug("start field_match", 10) if type(matcher) == type(""): matcher = re.compile(matcher) for f in [s[tup[0]:tup[1]] for s in strings]: #f=s[tup[0]:tup[1]] if f and not matcher.match(f): testtimer.end() return False testtimer.end() return True
def __init__(self): # hooks run after adding, modifying or deleting a recipe. # Each hook is handed the recipe, except for delete_hooks, # which is handed the ID (since the recipe has been deleted) self.add_hooks = [] self.modify_hooks = [] self.delete_hooks = [] self.add_ing_hooks = [] timer = TimeAction("initialize_connection + setup_tables", 2) self.initialize_connection() self.setup_tables() timer.end()
def field_match (strings, tup, matcher): testtimer = TimeAction('mealmaster_importer.field_match',10) debug("start field_match",10) if type(matcher)==type(""): matcher=re.compile(matcher) for f in [s[tup[0]:tup[1]] for s in strings]: #f=s[tup[0]:tup[1]] if f and not matcher.match(f): testtimer.end() return False testtimer.end() return True
def parse_inglist(self): testtimer = TimeAction('mealmaster_importer.parse_inglis', 10) debug("start parse_inglist", 5) """We handle our ingredients after the fact.""" ingfields = self.find_ing_fields() debug("ingredient fields are: %s" % ingfields, 10) for s, g in self.ingrs: for afield, ufield, ifield in ingfields: self.group = g amt, u, i = get_fields(s, (afield, ufield, ifield)) debug( """amt:%(amt)s u:%(u)s i:%(i)s""" % locals(), 0) # sanity check... if not amt.strip() and not u.strip(): if not i: continue # if we have not amt or unit, let's do the right # thing if this just looks misaligned -- in other words # if the "item" column has 2 c. parsley, let's just parse # the damned thing as 2 c. parsley parsed = self.rd.parse_ingredient(i, conv=self.conv, get_key=False) if parsed and parsed.get('amount', '') and parsed.get( 'item', ''): amt = "%s" % parsed['amount'] u = parsed.get('unit', '') i = parsed['item'] debug( """After sanity check amt:%(amt)s u:%(u)s i:%(i)s""" % locals(), 0) if amt.strip() or u.strip() or i.strip(): self.start_ing() if amt: self.add_amt(amt) if u: self.add_unit(u) optm = self.ing_opt_matcher.match(i) if optm: item = optm.groups()[0] self.ing['optional'] = True else: item = i self.add_item(item) debug("committing ing: %s" % self.ing, 6) self.commit_ing() testtimer.end()
def is_ingredient(self, l): """Return true if the line looks like an ingredient. We're going to go with a somewhat hackish approach here. Once we have the ingredient list, we can determine columns more appropriately. For now, we'll assume that a field that starts with at least 5 blanks (the specs suggest 7) or a field that begins with a numeric value is an ingredient""" testtimer = TimeAction('mealmaster_importer.is_ingredient', 10) if self.ing_num_matcher.match(l): testtimer.end() return True if len(l) >= 7 and self.blank_matcher.match(l[0:5]): testtimer.end() return True
def __getitem__(self, k): if self.just_got.has_key(k): return self.just_got[k] if self.pickle_key: k = pickle.dumps(k) t = TimeAction('dbdict getting from db', 5) v = getattr(self.vw.select(**{self.kp: k})[0], self.vp) t.end() if v: try: return pickle.loads(v) except: print "Problem unpickling ", v raise else: return None
def __getitem__ (self, k): if self.just_got.has_key(k): return self.just_got[k] if self.pickle_key: k=pickle.dumps(k) t=TimeAction('dbdict getting from db',5) v = getattr(self.vw.select(**{self.kp:k})[0],self.vp) t.end() if v: try: return pickle.loads(v) except: print "Problem unpickling ",v raise else: return None
def is_ingredient (self, l): """Return true if the line looks like an ingredient. We're going to go with a somewhat hackish approach here. Once we have the ingredient list, we can determine columns more appropriately. For now, we'll assume that a field that starts with at least 5 blanks (the specs suggest 7) or a field that begins with a numeric value is an ingredient""" testtimer = TimeAction('mealmaster_importer.is_ingredient',10) if self.ing_num_matcher.match(l): testtimer.end() return True if len(l) >= 7 and self.blank_matcher.match(l[0:5]): testtimer.end() return True
def new_rec(self): """Start a new recipe.""" testtimer = TimeAction('mealmaster_importer.new_rec', 10) debug("start new_rec", 5) if self.rec: # this shouldn't happen if recipes are ended properly # but we'll be graceful if a recipe starts before another # has ended... self.commit_rec() self.committed = False self.start_rec() debug('resetting instructions', 5) self.instr = "" self.mod = "" self.ingrs = [] self.header = False testtimer.end()
def handle_group(self, groupm): """Start a new ingredient group.""" testtimer = TimeAction('mealmaster_importer.handle_group', 10) debug("start handle_group", 10) # the only group of the match will contain # the name of the group. We'll put it into # a more sane title case (MealMaster defaults # to all caps name = groupm.groups()[1] if not name: name = groupm.groups()[2] if not name: return name = name.strip().title() self.group = name #if re.match('^[^A-Za-z]*$',self.group): self.group=None #WTF was this for? testtimer.end()
def find_ing_fields(self): """Find fields in an ingredient line.""" testtimer = TimeAction('mealmaster_importer.find_ing_fields', 10) all_ings = [i[0] for i in self.ingrs] fields = find_fields(all_ings) fields_is_numfield = fields_match(all_ings, fields, self.amt_field_matcher) #fields = [[r,field_match(all_ings,r,self.amt_field_matcher)] for r in find_fields(all_ings)] aindex, afield = self.find_amt_field(fields, fields_is_numfield) if aindex != None: fields = fields[aindex + 1:] fields_is_numfield = fields_is_numfield[aindex + 1:] ufield = fields and self.find_unit_field(fields, fields_is_numfield) if ufield: fields = fields[1:] fields_is_numfield = fields_is_numfield[1:] if fields: ifield = [fields[0][0], None] else: ifield = 0, None retval = [[afield, ufield, ifield]] sec_col_fields = filter(lambda x: x[0] > self.two_col_minimum, fields) if sec_col_fields: ibase = fields.index(sec_col_fields[0]) while sec_col_fields and not fields_is_numfield[ibase]: ibase += 1 sec_col_fields = sec_col_fields[1:] # if we might have a 2nd column... if sec_col_fields and len(sec_col_fields) > 2: fields_is_numfield = fields_is_numfield[ibase:] aindex2, afield2 = self.find_amt_field(sec_col_fields, fields_is_numfield) if aindex2 != None and len(sec_col_fields[aindex2 + 1:]) >= 1: # then it's a go! Shift our first ifield retval[0][2] = [ifield[0], fields[ibase - 1][1]] sec_col_fields = sec_col_fields[aindex2 + 1:] fields_is_numfield = fields_is_numfield[aindex2 + 1:] ufield2 = self.find_unit_field(sec_col_fields, fields_is_numfield) if ufield2: sec_col_fields = sec_col_fields[1:] fields_is_numfield = fields_is_numfield[1:] ifield2 = sec_col_fields[0][0], None retval.append([afield2, ufield2, ifield2]) testtimer.end() return retval
def convert_str_to_num (self, str): """Return a numerical servings value""" timeaction = TimeAction('importer.convert_str_to_num',10) debug('converting servings for %s'%str,5) try: return float(str) except: conv = convert.frac_to_float(str) if conv: return conv m=re.match("([0-9/. ]+)",str) if m: num=m.groups()[0] try: return float(num) except: return convert.frac_to_float(num) timeaction.end()
def new_rec (self): """Start a new recipe.""" testtimer = TimeAction('mealmaster_importer.new_rec',10) debug("start new_rec",5) if self.rec: # this shouldn't happen if recipes are ended properly # but we'll be graceful if a recipe starts before another # has ended... self.commit_rec() self.committed=False self.start_rec() debug('resetting instructions',5) self.instr="" self.mod = "" self.ingrs=[] self.header=False testtimer.end()
def parse_inglist(self): testtimer = TimeAction('mealmaster_importer.parse_inglis',10) debug("start parse_inglist",5) """We handle our ingredients after the fact.""" ingfields =self.find_ing_fields() debug("ingredient fields are: %s"%ingfields,10) for s,g in self.ingrs: for afield,ufield,ifield in ingfields: self.group = g amt,u,i = get_fields(s,(afield,ufield,ifield)) debug("""amt:%(amt)s u:%(u)s i:%(i)s"""%locals(),0) # sanity check... if not amt.strip() and not u.strip(): if not i: continue # if we have not amt or unit, let's do the right # thing if this just looks misaligned -- in other words # if the "item" column has 2 c. parsley, let's just parse # the damned thing as 2 c. parsley parsed = self.rd.parse_ingredient(i,conv=self.conv,get_key=False) if parsed and parsed.get('amount','') and parsed.get('item',''): amt = "%s"%parsed['amount'] u = parsed.get('unit','') i = parsed['item'] debug("""After sanity check amt:%(amt)s u:%(u)s i:%(i)s"""%locals(),0) if amt.strip() or u.strip() or i.strip(): self.start_ing() if amt: self.add_amt(amt) if u: self.add_unit(u) optm=self.ing_opt_matcher.match(i) if optm: item=optm.groups()[0] self.ing['optional']=True else: item = i self.add_item(item) debug("committing ing: %s"%self.ing,6) self.commit_ing() testtimer.end()
def do_run (self): # count the recipes in the file t = TimeAction("rxml_to_metakit.run counting lines",0) if type(self.fn)==str: f=file(self.fn,'rb') else: f=self.fn recs = 0 for l in f.readlines(): if l.find(self.recMarker) >= 0: recs += 1 if recs % 5 == 0: self.check_for_sleep() f.close() t.end() self.rh.total=recs self.parse = xml.sax.parse(self.fn, self.rh) self.added_ings = self.rh.added_ings self.added_recs = self.rh.added_recs importer.Importer._run_cleanup_(self.rh)
def start_rec (self, dict=None): self.rec_timer = TimeAction('importer RECIPE IMPORT',10) timeaction = TimeAction('importer.start_rec',10) self.check_for_sleep() if hasattr(self,'added_ings') and self.added_ings: print 'WARNING: starting new rec, but we have ingredients that we never added!' print 'Unadded ingredients: ',self.added_ings self.added_ings=[] self.group = None if dict: self.rec=dict else: self.rec = {} #if not self.rec.has_key('id'): #else: # self.rec['id']=self.rd.new_id() #debug('New Import\'s ID=%s'%self.rec['id'],0) timeaction.end()
def commit_rec(self): """Commit our recipe to our database.""" testtimer = TimeAction('mealmaster_importer.commit_rec', 10) if self.committed: return debug("start _commit_rec", 5) self.instr = self.unwrap_lines(self.instr) self.mod = self.unwrap_lines(self.mod) self.rec['instructions'] = self.instr if self.mod: self.rec['modifications'] = self.mod self.parse_inglist() if self.source: self.rec['source'] = self.source importer.Importer.commit_rec(self) # blank rec self.committed = True self.in_variation = False testtimer.end()
def handle_group (self, groupm): """Start a new ingredient group.""" testtimer = TimeAction('mealmaster_importer.handle_group',10) debug("start handle_group",10) print 'handle_group',groupm,groupm.groups() # the only group of the match will contain # the name of the group. We'll put it into # a more sane title case (MealMaster defaults # to all caps name = groupm.groups()[1] if not name: name = groupm.groups()[2] if not name: return name = name.strip().title() self.group=name #if re.match('^[^A-Za-z]*$',self.group): self.group=None #WTF was this for? testtimer.end()
def commit_rec (self): """Commit our recipe to our database.""" testtimer = TimeAction('mealmaster_importer.commit_rec',10) if self.committed: return debug("start _commit_rec",5) self.instr = self.unwrap_lines(self.instr) self.mod = self.unwrap_lines(self.mod) self.rec['instructions']=self.instr if self.mod: self.rec['modifications']=self.mod self.parse_inglist() if self.source: self.rec['source']=self.source importer.Importer.commit_rec(self) # blank rec self.committed = True self.in_variation=False testtimer.end()
def do_run(self): # count the recipes in the file t = TimeAction("rxml_to_metakit.run counting lines", 0) if isinstance(self.fn, str): f = open(self.fn, 'rb') else: f = self.fn recs = 0 for l in f.readlines(): if l.find(self.recMarker) >= 0: recs += 1 if recs % 5 == 0: self.check_for_sleep() f.close() t.end() self.rh.total = recs self.parse = xml.sax.parse(self.fn, self.rh) self.added_ings = self.rh.added_ings self.added_recs = self.rh.added_recs importer.Importer._run_cleanup_(self.rh)
def find_ing_fields (self): """Find fields in an ingredient line.""" testtimer = TimeAction('mealmaster_importer.find_ing_fields',10) all_ings = [i[0] for i in self.ingrs] fields = find_fields(all_ings) fields_is_numfield = fields_match(all_ings,fields,self.amt_field_matcher) #fields = [[r,field_match(all_ings,r,self.amt_field_matcher)] for r in find_fields(all_ings)] aindex,afield = self.find_amt_field(fields,fields_is_numfield) if aindex != None: fields = fields[aindex+1:] fields_is_numfield = fields_is_numfield[aindex+1:] ufield = fields and self.find_unit_field(fields,fields_is_numfield) if ufield: fields = fields[1:] fields_is_numfield = fields_is_numfield[1:] if fields: ifield = [fields[0][0],None] else: ifield = 0,None retval = [[afield,ufield,ifield]] sec_col_fields = filter(lambda x: x[0]>self.two_col_minimum,fields) if sec_col_fields: ibase = fields.index(sec_col_fields[0]) while sec_col_fields and not fields_is_numfield[ibase]: ibase += 1 sec_col_fields = sec_col_fields[1:] # if we might have a 2nd column... if sec_col_fields and len(sec_col_fields) > 2: fields_is_numfield = fields_is_numfield[ibase:] aindex2,afield2 = self.find_amt_field(sec_col_fields,fields_is_numfield) if aindex2 != None and len(sec_col_fields[aindex2+1:]) >= 1: # then it's a go! Shift our first ifield retval[0][2]=[ifield[0],fields[ibase-1][1]] sec_col_fields = sec_col_fields[aindex2 + 1:] fields_is_numfield = fields_is_numfield[aindex2+1:] ufield2 = self.find_unit_field(sec_col_fields,fields_is_numfield) if ufield2: sec_col_fields=sec_col_fields[1:] fields_is_numfield = fields_is_numfield[1:] ifield2 = sec_col_fields[0][0],None retval.append([afield2,ufield2,ifield2]) testtimer.end() return retval
def find_columns (strings, char=" "): testtimer = TimeAction('mealmaster_importer.find_columns',10) """Return a list of character indices that match char for each string in strings.""" debug("start find_columns",10) # we start with the columns in the first string if not strings: return None strings=strings[0:] strings.sort(lambda x,y: len(x)>len(y)) columns = [match.start() for match in re.finditer(re.escape(char),strings[0])] if len(strings)==1: return columns # we eliminate all columns that aren't blank for every string for s in strings: for c in columns[0:]: # we'll be modifying columns if c < len(s) and s[c]!=char: columns.remove(c) columns.sort() testtimer.end() return columns
def add_rec (self, rdict): """Add a recipe based on a dictionary of properties and values.""" self.changed=True t = TimeAction('rdatabase.add_rec - checking keys',3) if not rdict.has_key('deleted'): rdict['deleted']=0 if not rdict.has_key('id'): rdict['id']=self.new_id() t.end() try: debug('Adding recipe %s'%rdict, 4) t = TimeAction('rdatabase.add_rec - recipe_table.append(rdict)',3) self.recipe_table.append(rdict) t.end() debug('Running add hooks %s'%self.add_hooks,2) if self.add_hooks: self.run_hooks(self.add_hooks,self.recipe_table[-1]) return self.recipe_table[-1] except: debug("There was a problem adding recipe%s"%rdict,-1) raise
def do_run (self): # count the recipes in the file t = TimeAction("rxml_to_metakit.run counting lines",0) if isinstance(self.fn, str): # Latin-1 can decode any bytes, letting us open ASCII-compatible # text files and sniff their contents - e.g. for XML tags - # without worrying about their real text encoding. f = open(self.fn, 'r', encoding='latin1') else: f=self.fn recs = 0 for l in f.readlines(): if l.find(self.recMarker) >= 0: recs += 1 if recs % 5 == 0: self.check_for_sleep() f.close() t.end() self.rh.total=recs self.parse = xml.sax.parse(self.fn, self.rh) self.added_ings = self.rh.added_ings self.added_recs = self.rh.added_recs importer.Importer._run_cleanup_(self.rh)
def add_rec(self, rdict): """Add a recipe based on a dictionary of properties and values.""" self.changed = True t = TimeAction('rdatabase.add_rec - checking keys', 3) if not rdict.has_key('deleted'): rdict['deleted'] = 0 if not rdict.has_key('id'): rdict['id'] = self.new_id() t.end() try: debug('Adding recipe %s' % rdict, 4) t = TimeAction('rdatabase.add_rec - recipe_table.append(rdict)', 3) self.recipe_table.append(rdict) t.end() debug('Running add hooks %s' % self.add_hooks, 2) if self.add_hooks: self.run_hooks(self.add_hooks, self.recipe_table[-1]) return self.recipe_table[-1] except: debug("There was a problem adding recipe%s" % rdict, -1) raise
def find_fields(strings, char=" "): testtimer = TimeAction('mealmaster_importer.find_fields', 10) cols = find_columns(strings, char) if not cols: return [] cols.reverse() fields = [] lens = map(len, strings) lens.sort() end = lens[-1] last_col = end for col in cols: if col == last_col - 1: end = col else: fields.append([col + 1, end]) end = col last_col = col if end != 0: fields.append([0, end]) fields.reverse() testtimer.end() return fields
def find_fields (strings, char=" "): testtimer = TimeAction('mealmaster_importer.find_fields',10) cols = find_columns(strings, char) if not cols: return [] cols.reverse() fields = [] lens = map(len,strings) lens.sort() end = lens[-1] last_col = end for col in cols: if col == last_col - 1: end = col else: fields.append([col+1,end]) end = col last_col = col if end != 0: fields.append([0,end]) fields.reverse() testtimer.end() return fields
def find_columns(strings, char=" "): testtimer = TimeAction('mealmaster_importer.find_columns', 10) """Return a list of character indices that match char for each string in strings.""" debug("start find_columns", 10) # we start with the columns in the first string if not strings: return None strings = strings[0:] strings.sort(lambda x, y: len(x) > len(y)) columns = [ match.start() for match in re.finditer(re.escape(char), strings[0]) ] if len(strings) == 1: return columns # we eliminate all columns that aren't blank for every string for s in strings: for c in columns[0:]: # we'll be modifying columns if c < len(s) and s[c] != char: columns.remove(c) columns.sort() testtimer.end() return columns
def __init__ (self,rd,filename='Data/mealmaster.mmf', progress=None, source=None,threaded=True, two_col_minimum=38,conv=None): """filename is the file to parse (or filename). rd is the recData instance to start with. progress is a function we tell about our progress to (we hand it a single arg).""" testtimer = TimeAction('mealmaster_importer.__init__',10) debug("mmf_importer start __init__ ",5) self.source=source self.header=False self.instr="" self.ingrs=[] self.ing_added=False self.in_variation=False self.fn = filename self.progress = progress self.unit_length = 2 self.two_col_minimum = two_col_minimum self.last_line_was = None plaintext_importer.TextImporter.__init__(self,filename,rd,progress=progress, threaded=threaded,conv=conv) testtimer.end()
def compile_regexps(self): """Compile our regular expressions for the rezkonv format. """ testtimer = TimeAction('mealmaster_importer.compile_regexps', 10) debug("start compile_regexps", 5) plaintext_importer.TextImporter.compile_regexps(self) self.start_matcher = re.compile(rzc_start_pattern) self.end_matcher = re.compile(r"^[=M-][=M-][=M-][=M-][=M-]\s*$") self.group_matcher = re.compile( r"^\s*([=M-][=M-][=M-][=M-][=M-]+)-*\s*([^-]+)\s*-*", re.IGNORECASE) self.ing_cont_matcher = re.compile(r"^\s*[-;]") self.ing_opt_matcher = re.compile(r"(.+?)\s*\(?\s*optional\)?\s*$", re.IGNORECASE) # or or the German, oder self.ing_or_matcher = re.compile(r"^[-= ]*[Oo][dD]?[eE]?[Rr][-= ]*$", re.IGNORECASE) self.variation_matcher = re.compile( r"^\s*(VARIATION|HINT|NOTES?|VERÄNDERUNG|VARIANTEN|TIPANMERKUNGEN)(:.*)?", re.IGNORECASE) # a crude ingredient matcher -- we look for two numbers, intermingled with spaces # followed by a space or more, followed by a two digit unit (or spaces) self.ing_num_matcher = re.compile( r"^\s*%(top)s%(num)s+\s+[A-Za-z ][A-Za-z ]? .*" % { 'top': convert.DIVIDEND_REGEXP, 'num': convert.NUMBER_REGEXP }, re.IGNORECASE) self.amt_field_matcher = convert.NUMBER_MATCHER # we build a regexp to match anything that looks like # this: ^\s*ATTRIBUTE: Some entry of some kind...$ attrmatch = r"^\s*(" self.mmf = rzc for k in list(self.mmf.recattrs.keys()): attrmatch += "%s|" % re.escape(k) attrmatch = r"%s):\s*(.*)\s*$" % attrmatch[0:-1] self.attr_matcher = re.compile(attrmatch) testtimer.end()