def __init__(self, *arg): # read raw rows and definition super().__init__(*arg) # label rows self.label() # allow call like rs.data.annual_df() self.data = DictsAsDataframes(self.dicts()) #check for duplicates try_dfa = self.data.annual_df()
class CoreRowSystem(InputDefinition): """Data structure and functions to manupulate raw data and pasring specification. Main mthods are: dicts() label() save() """ def __init__(self, *arg): # read raw rows and definition super().__init__(*arg) # label rows self.label() # allow call like rs.data.annual_df() self.data = DictsAsDataframes(self.dicts()) #check for duplicates try_dfa = self.data.annual_df() def dicts(self): return dicts_as_stream(self) def named_dicts(self, name): for d in self.dicts(): if d['varname'] == name: yield d def get_header_and_desc_dicts(self): for spec in self.segments: for k,v in spec.header_dict.items(): yield {"_head":v[0], '_desc':k} def save(self): self.save_as_default() def save_as_default(self): self.dump_dicts_to_db(db = DefaultDatabase()) self.toc() return self def save_as_test(self): self.dump_dicts_to_db(db = TrialDatabase()) return self def dump_dicts_to_db(self, db): # WARNING: call order matters, cannot call db.save_data_dicts(), causes error db.save_headlabel_description_dicts(gen = self.get_header_and_desc_dicts()) db.save_data_dicts(gen = self.dicts()) def label(self): self._assign_parsing_specification_by_row() self._run_label_adjuster() def _run_label_adjuster(self): """Label rows using markup information from self.specs[i].header_dict and .unit_dict. Stores labels in self.labels[i]. """ cur_label = UnknownLabel() for i, head in self.row_heads: # change cur_label at text rows, not starting with year number if not is_year(head): cur_label = adjust_labels(textline=head, incoming_label=cur_label, dict_headline=self.specs[i].header_dict, dict_unit=self.specs[i].unit_dict) self.labels[i] = Label(cur_label.head, cur_label.unit) def _assign_parsing_specification_by_row(self): """Write appropriate parsing specification from selfsegments[0] or self.segments to self.specs[i] based on segments[i].start_line and .end_line """ switch = _SegmentState(self.segments) # no segment information - all rows have default parsing specification if len(self.segments) == 1: for i, head in self.row_heads: self.specs[i] = self.segments[0] # segment information is supplied, will check row_heads and compare it with else: for i, head in self.row_heads: # are we in the default spec? if switch.in_segment: # we are in custom spec. do we have to switch to the default spec? switch.update_if_leaving_custom_segment(head) # maybe it is also a start of a new custom spec? switch.update_if_entered_custom_segment(head) # should we switch to custom spec? else: switch.update_if_entered_custom_segment(head) #finished adjusting specification for i-th row self.specs[i] = switch.current_spec