def display_stats(self, org_id): org = Organization.objects.get(pk=org_id) logging_info( "########## PROCESSING ORGANIZATION {} - {} #################". format(org.id, org.name)) singleton_count = 0 malformed_count = 0 multiple_count = 0 invalid_strings = [] base_query = TaxLotProperty.objects.filter( property_view__property__organization=org) for m2m in base_query.all(): tax_lot_id = m2m.taxlot_view.state.jurisdiction_tax_lot_id try: fields = get_id_fields(tax_lot_id) if len(fields) > 1: logging_info("Possible match: {}".format(tax_lot_id)) multiple_count += 1 else: singleton_count += 1 except TaxLotIDValueError, e: malformed_count += 1 invalid_strings.append(e.original_string)
def handle(self, *args, **options): logging_info( "RUN migrate_extradata_columns with args={},kwds={}".format(args, options)) if options['organization']: organization_ids = map(int, options['organization'].split(",")) else: organization_ids = get_core_organizations() update_columns = options['update_columns'] add_unmapped_columns = options['add_unmapped_columns'] create_missing_columns = options['create_missing_columns'] for org_id in organization_ids: org = Organization.objects.get(pk=org_id) # Update if update_columns: self.update_columns_based_on_mappings_file(org) if create_missing_columns: self.find_missing_columns_based_on_extra_data(org) logging_info("END migrate_extradata_columns") return
def handle(self, *args, **options): logging_info("RUN create_m2m_relatinships_organization with args={},kwds={}".format(args, options)) if options['organization']: core_organization = map(int, options['organization'].split(",")) else: core_organization = get_core_organizations() logging_info("Processing organization list: {}".format(core_organization)) if options['stats']: for org_id in core_organization: self.display_stats(org_id) return org_taxlot_splitdata_rules = collections.defaultdict(lambda : (UNIQUE_LIST, ("jurisdiction_tax_lot_id",))) org_taxlot_splitdata_rules[20] = (UNIQUE_LIST, ("jurisdiction_tax_lot_id", "extra_data/Philadelphia Building ID",)) for org_id in core_organization: self.split_taxlots_into_m2m_relationships(org_id, org_taxlot_splitdata_rules) # At the end run two checks: # Go through the tax Lots, collect any that are left, make # sure they aren't a part of any m2m entities. for view in TaxLotView.objects.filter(taxlot__organization_id=org_id).all(): try: # pdb.set_trace() # aggregate_value_from_state(view.state, org_taxlot_split_extra[org_id]) taxlot_field_list = get_id_fields(view.state.jurisdiction_tax_lot_id) if len(taxlot_field_list) > 1: logging_warn("Danger - tax lot '{}' still exists.".format(view.state.jurisdiction_tax_lot_id)) except TaxLotIDValueError, e: continue
def handle(self, *args, **options): logging_info( "RUN migrate_extradata_columns with args={},kwds={}".format( args, options)) if options['organization']: organization_ids = map(int, options['organization'].split(",")) else: organization_ids = get_core_organizations() update_columns = options['update_columns'] add_unmapped_columns = options['add_unmapped_columns'] create_missing_columns = options['create_missing_columns'] for org_id in organization_ids: org = Organization.objects.get(pk=org_id) # Update if update_columns: self.update_columns_based_on_mappings_file(org) if create_missing_columns: self.find_missing_columns_based_on_extra_data(org) logging_info("END migrate_extradata_columns") return
def update_columns_based_on_mappings_file(self, org): """Go through each of the organization columns as reported by the 'extradata.csv' mappings file and make sure it points to the table specified in that file. """ logging_info( "Updating columns for org {} to match that in migration mapping file." .format(org)) taxlot_column_names = get_taxlot_columns(org) property_column_names = get_property_columns(org) found = 0 notfound = 0 for prop_col in property_column_names: qry = Column.objects.filter(organization=org, column_name=prop_col) cnt = qry.count() if cnt: # Update the column col = qry.first() logging_info( "Setting Column '{}' to SOURCE_PROPERTY".format(col)) col.extra_data_source = Column.SOURCE_PROPERTY col.save() else: col = Column(organization=org, column_name=prop_col, is_extra_data=True, table_name="PropertyState") logging_info( "Creating Column '{}' based on missing from mappings file". format(prop_col)) col.save() for tl_col in taxlot_column_names: qry = Column.objects.filter(organization=org, column_name=tl_col) cnt = qry.count() if cnt: # Update the column col = qry.first() col.extra_data_source = Column.SOURCE_TAXLOT logging_info( "Setting Column '{}' to SOURCE_TAXLOT".format(col)) col.save() else: col = Column(organization=org, column_name=tl_col, is_extra_data=True, table_name="TaxLotState") logging_info( "Creating Column '{}' based on missing from mappings file". format(tl_col)) col.save() return
def find_missing_columns_based_on_extra_data(self, org): """Look through all the extra_data fields of the TaxLot and Property State objects and make sure there are columns that point to them. """ logging_info( "Creating any columns for non-mapped extra data fields for organization {}" .format(org)) property_states = PropertyState.objects.filter(organization=org).all() taxlot_states = TaxLotState.objects.filter(organization=org).all() get_ed_keys = lambda state: state.extra_data.keys() property_keys = set( itertools.chain.from_iterable(map(get_ed_keys, property_states))) taxlot_keys = set( itertools.chain.from_iterable(map(get_ed_keys, taxlot_states))) # Iterate through each of the extra data fields associated # with the org's PropertyState objects and check to make sure # there is Column with that key name. for key in property_keys: cnt = Column.objects.filter(organization=org, column_name=key).count() if not cnt: logging_info( "Missing column '{}' found in PropertyState extra_data keys" .format(key)) logging_info("Creating missing column '{}'".format(key)) col = Column(organization=org, column_name=key, is_extra_data=True, table_name="PropertyState") col.save() # Iterate through each of the extra data fields associated with the TaxLotStates for key in taxlot_keys: cnt = Column.objects.filter(organization=org, column_name=key).count() if not cnt: logging_info( "Missing column '{}' found in TaxLotState extra_data keys." .format(key)) logging_info("Creating missing column '{}'".format(key)) col = Column(organization=org, column_name=key, is_extra_data=True, table_name="TaxLotState") col.save() return
def update_columns_based_on_mappings_file(self, org): """Go through each of the organization columns as reported by the 'extradata.csv' mappings file and make sure it points to the table specified in that file. """ logging_info("Updating columns for org {} to match that in migration mapping file.".format(org)) taxlot_column_names = get_taxlot_columns(org) property_column_names = get_property_columns(org) found = 0 notfound = 0 for prop_col in property_column_names: qry = Column.objects.filter(organization=org, column_name=prop_col) cnt = qry.count() if cnt: # Update the column col = qry.first() logging_info("Setting Column '{}' to SOURCE_PROPERTY".format(col)) col.extra_data_source = Column.SOURCE_PROPERTY col.save() else: col = Column(organization=org, column_name=prop_col, is_extra_data=True, table_name="PropertyState") logging_info("Creating Column '{}' based on missing from mappings file".format(prop_col)) col.save() for tl_col in taxlot_column_names: qry = Column.objects.filter(organization=org, column_name=tl_col) cnt = qry.count() if cnt: # Update the column col = qry.first() col.extra_data_source = Column.SOURCE_TAXLOT logging_info("Setting Column '{}' to SOURCE_TAXLOT".format(col)) col.save() else: col = Column(organization=org, column_name=tl_col, is_extra_data=True, table_name="TaxLotState") logging_info("Creating Column '{}' based on missing from mappings file".format(tl_col)) col.save() return
def assign_primarysecondary_tax_lots(self, org): for property_view in PropertyView.objects.filter( property__organization=org).all(): logging_info( "assign_primarysecondary_tax_lots for property {p}".format( p=property_view.state.pm_property_id)) found_ct = TaxLotProperty.objects.filter( property_view=property_view).count() logging_info("Found {ct} TaxLotProperty".format(ct=found_ct)) if found_ct <= 1: continue links = list( TaxLotProperty.objects.filter(property_view=property_view). order_by('taxlot_view__state__jurisdiction_tax_lot_id').all()) logging_info( "Found {ct} linked TaxLotProperties".format(ct=len(links))) for ndx in xrange(1, len(links)): logging_info( "Setting secondary for property {p} for cycle {c}: {s}". format(p=property_view.state.pm_property_id, c=property_view.cycle.name, s=links[ndx].property_view.state.pm_property_id)) links[ndx].primary = False links[ndx].save() return
def find_missing_columns_based_on_extra_data(self, org): """Look through all the extra_data fields of the TaxLot and Property State objects and make sure there are columns that point to them. """ logging_info("Creating any columns for non-mapped extra data fields for organization {}".format(org)) property_states = PropertyState.objects.filter(organization=org).all() taxlot_states = TaxLotState.objects.filter(organization=org).all() get_ed_keys = lambda state: state.extra_data.keys() property_keys = set(itertools.chain.from_iterable(map(get_ed_keys, property_states))) taxlot_keys = set(itertools.chain.from_iterable(map(get_ed_keys, taxlot_states))) # Iterate through each of the extra data fields associated # with the org's PropertyState objects and check to make sure # there is Column with that key name. for key in property_keys: cnt = Column.objects.filter(organization=org, column_name=key).count() if not cnt: logging_info("Missing column '{}' found in PropertyState extra_data keys".format(key)) logging_info("Creating missing column '{}'".format(key)) col = Column(organization=org, column_name=key, is_extra_data=True, table_name="PropertyState") col.save() # Iterate through each of the extra data fields associated with the TaxLotStates for key in taxlot_keys: cnt = Column.objects.filter(organization=org, column_name=key).count() if not cnt: logging_info("Missing column '{}' found in TaxLotState extra_data keys.".format(key)) logging_info("Creating missing column '{}'".format(key)) col = Column(organization=org, column_name=key, is_extra_data=True, table_name="TaxLotState") col.save() return
def handle(self, *args, **options): logging_info( "RUN create_m2m_relatinships_organization with args={},kwds={}". format(args, options)) if options['organization']: core_organization = map(int, options['organization'].split(",")) else: core_organization = get_core_organizations() logging_info( "Processing organization list: {}".format(core_organization)) if options['stats']: for org_id in core_organization: self.display_stats(org_id) return org_taxlot_splitdata_rules = collections.defaultdict( lambda: (UNIQUE_LIST, ("jurisdiction_tax_lot_id", ))) org_taxlot_splitdata_rules[20] = (UNIQUE_LIST, ( "jurisdiction_tax_lot_id", "extra_data/Philadelphia Building ID", )) for org_id in core_organization: self.split_taxlots_into_m2m_relationships( org_id, org_taxlot_splitdata_rules) # At the end run two checks: # Go through the tax Lots, collect any that are left, make # sure they are not a part of any m2m entities. for view in TaxLotView.objects.filter( taxlot__organization_id=org_id).all(): try: # pdb.set_trace() # aggregate_value_from_state(view.state, org_taxlot_split_extra[org_id]) taxlot_field_list = get_id_fields( view.state.jurisdiction_tax_lot_id) if len(taxlot_field_list) > 1: logging_warn("Danger - tax lot '{}' still exists.".format( view.state.jurisdiction_tax_lot_id)) except TaxLotIDValueError, e: continue
def assign_primarysecondary_tax_lots(self, org): for property_view in PropertyView.objects.filter(property__organization=org).all(): logging_info("assign_primarysecondary_tax_lots for property {p}".format(p = property_view.state.pm_property_id)) found_ct = TaxLotProperty.objects.filter(property_view=property_view).count() logging_info("Found {ct} TaxLotProperty".format(ct = found_ct)) if found_ct <= 1: continue links = list(TaxLotProperty.objects.filter(property_view=property_view).order_by( 'taxlot_view__state__jurisdiction_tax_lot_id').all()) logging_info("Found {ct} linked TaxLotProperties".format(ct = len(links))) for ndx in xrange(1, len(links)): logging_info("Setting secondary for property {p} for cycle {c}: {s}".format(p = property_view.state.pm_property_id, c = property_view.cycle.name, s = links[ndx].property_view.state.pm_property_id)) links[ndx].primary = False links[ndx].save() return
def display_stats(self, org_id): org = Organization.objects.get(pk=org_id) logging_info("########## PROCESSING ORGANIZATION {} - {} #################".format(org.id, org.name)) singleton_count = 0 malformed_count = 0 multiple_count = 0 invalid_strings = [] base_query = TaxLotProperty.objects.filter(property_view__property__organization=org) for m2m in base_query.all(): tax_lot_id = m2m.taxlot_view.state.jurisdiction_tax_lot_id try: fields = get_id_fields(tax_lot_id) if len(fields) > 1: logging_info("Possible match: {}".format(tax_lot_id)) multiple_count += 1 else: singleton_count += 1 except TaxLotIDValueError, e: malformed_count += 1 invalid_strings.append(e.original_string)
def handle(self, *args, **options): logging_info("RUN org_specific_commands with args={},kwds={}".format(args, options)) if options['organization']: core_organization = list(map(int, options['organization'].split(","))) else: core_organization = [20, 69] logging_info("Processing organization list: {}".format(core_organization)) for org in core_organization: process_org(org) logging_info("END org_specific_commands") return
def handle(self, *args, **options): logging_info("RUN create_m2m_relatinships_organization with args={},kwds={}".format(args, options)) if options['organization']: core_organization = map(int, options['organization'].split(",")) else: core_organization = [20, 69] logging_info("Processing organization list: {}".format(core_organization)) for org in core_organization: delete_data_from_org(org) logging_info("END create_m2m_relatinships_organization") return
def handle(self, *args, **options): """Go through organization by organization and look for m2m.""" logging_info("RUN create_primarysecondary_taxlots with args={},kwds={}".format(args, options)) if options['organization']: core_organization = map(int, options['organization'].split(",")) else: core_organization = get_core_organizations() for org_id in core_organization: # Writing loop over organizations org = Organization.objects.filter(pk=org_id).first() logging_info("Processing organization {}".format(org)) assert org, "Organization {} not found".format(org_id) self.assign_primarysecondary_tax_lots(org) logging_info("END create_primarysecondary_taxlots") return
def handle(self, *args, **options): """Go through organization by organization and look for m2m.""" logging_info( "RUN create_primarysecondary_taxlots with args={},kwds={}".format( args, options)) if options['organization']: core_organization = map(int, options['organization'].split(",")) else: core_organization = get_core_organizations() for org_id in core_organization: # Writing loop over organizations org = Organization.objects.filter(pk=org_id).first() logging_info("Processing organization {}".format(org)) assert org, "Organization {} not found".format(org_id) self.assign_primarysecondary_tax_lots(org) logging_info("END create_primarysecondary_taxlots") return
taxlot_id_list = get_id_fields(jurisdiction_tax_lot_id) logging_debug( "Found taxlot_id_list with {ct} items. {l}".format( ct=len(taxlot_id_list), l=taxlot_id_list)) if len(taxlot_id_list) <= 1: continue assert TaxLotProperty.objects.filter( taxlot_view=original_taxlot_view).count( ) == 0, "Tax Lot should have been broken up already." if TaxLotProperty.objects.filter( taxlot_view=original_taxlot_view).count() != 0: logging_debug( "Tax Lot should have been broken up already.") # Some have duplicates for taxlot_id in set(taxlot_id_list): logging_info( "Break up tax lot {} to {} for cycle {}".format( jurisdiction_tax_lot_id, taxlot_id_list, m2m.cycle)) # Take tax lot and create a taxlot, a taxlot view, and a taxlot state. # taxlot state, and an m2m for the view and installs each. matching_views_qry = TaxLotView.objects.filter( taxlot__organization=org, state__jurisdiction_tax_lot_id=taxlot_id) matching_views_ct = matching_views_qry.count() logging_debug("Found {ct} matching views".format( ct=matching_views_ct)) if matching_views_ct: taxlot = matching_views_qry.first().taxlot taxlotview_ct = TaxLotView.objects.filter( taxlot=taxlot, cycle=original_taxlot_view.cycle).count()
def handle(self, *args, **options): """Do something.""" logging_info("RUN create_campus_relationships_organization with args={},kwds={}".format(args, options)) if options['organization']: core_organization = map(int, options['organization'].split(",")) else: core_organization = get_core_organizations() for org_id in core_organization: # Writing loop over organizations org = Organization.objects.filter(pk=org_id).first() logging.info("Processing organization {}".format(org)) assert org, "Organization {} not found".format(org_id) property_views = PropertyView.objects.filter(cycle__organization=org) \ .exclude(state__pm_parent_property_id=None) \ .exclude(state__pm_parent_property_id="Not Applicable: Standalone Property") \ .all() property_views = list(property_views) property_views.sort(key=lambda pv: pv.cycle.start) states = map(lambda pv: pv.state, list(property_views)) # All property views where the state has a parent property that isn't "Not Applicable." for (pv, state) in zip(property_views, states): pm_parent_property_id = state.pm_parent_property_id # What is the difference between these two fields? if pm_parent_property_id == state.pm_property_id: logging_info("Auto reference: site id={}/pm_property_id={} is it's own campus (pm_parent_property_id={})".format(pv.property.id, state.pm_property_id, state.pm_parent_property_id)) prop = pv.property prop.campus = True prop.save() continue parent_property = find_property_associated_with_portfolio_manager_id(pm_parent_property_id) if not parent_property: logging_info("Could not find parent property with pm_property_id={}. Creating new Property/View/State for it.".format(pm_parent_property_id)) parent_property = Property(organization_id=org_id) parent_property.campus = True parent_property.save() # Create a view and a state for the active cycle. parent_property_state = PropertyState(organization=org, pm_property_id=pm_parent_property_id, pm_parent_property_id=pm_parent_property_id, property_notes="Created by campus relations migration on {}".format( datetime.datetime.now().strftime( "%Y-%m-%d %H:%M"))) parent_property_state.save() parent_property_view = PropertyView(property=parent_property, cycle=pv.cycle, state=parent_property_state) parent_property_view.save() child_property = pv.property child_property = child_property.parent_property = parent_property child_property.save() else: logging_info("Found property matching pm_parent_property_id={}".format(pm_parent_property_id)) parent_property.campus = True parent_property.save() child_property = pv.property child_property.parent_property = parent_property child_property.save() # Make sure the parent has a view for the same # cycle as the pv in question. if not PropertyView.objects.filter(property=parent_property, cycle=pv.cycle).count(): parent_views = list(PropertyView.objects.filter(property=parent_property).all()) parent_views.sort(key=lambda pv: pv.cycle.start) # parent_views = [ppv for ppv in parent_views if ppv.cycle.start <= pv.cycle.start] assert len(parent_views), "This should always be true." ps = parent_views[-1].state ps.pk = None ps.save() parent_view = PropertyView(property=parent_property, cycle=pv.cycle, state=ps) parent_view.save() logging_info("END create_campus_relationships_organization") return
def create_associated_bluesky_taxlots_properties(org, import_buildingsnapshots, leaf_building, other_buildingsnapshots, child_dictionary, parent_dictionary, adj_matrix, cb): """Take tree structure describing a single Property/TaxLot over time and create the entities.""" logging_info("Populating new blue sky entities for canonical snapshot tree!") tax_lot_created = 0 property_created = 0 tax_lot_view_created = 0 property_view_created = 0 tax_lot_state_created = 0 property_state_created = 0 m2m_created = 0 logging_info("Creating Property/TaxLot from {} nodes".format( sum(map(len, ([leaf_building], other_buildingsnapshots, import_buildingsnapshots))))) tax_lot = None property_obj = None if node_has_tax_lot_info(leaf_building, org): tax_lot, created = find_or_create_bluesky_taxlot_associated_with_building_snapshot( leaf_building, org) # tax_lot = seed.models.TaxLot(organization=org) tax_lot.save() tax_lot_created += int(created) if node_has_property_info(leaf_building, org): property_obj, created = find_or_create_bluesky_property_associated_with_building_snapshot( leaf_building, org) # property_obj = seed.models.Property(organization=org) property_obj.save() property_created += int(created) if not property_obj and not tax_lot: property_obj = seed.models.Property(organization=org) property_obj.save() property_created += 1 last_taxlot_view = collections.defaultdict(lambda: False) last_property_view = collections.defaultdict(lambda: False) # HOHO: TODO: The original code had these in reverse creation # order but that definitely seems wrong. all_nodes = list( itertools.chain(import_buildingsnapshots, [leaf_building], other_buildingsnapshots)) all_nodes.sort(key=lambda rec: rec.created) # Sort from first to last # all_nodes = list(reversed(all_nodes)) # FIXME: Test this thoroughly. for node in all_nodes: node_type = classify_node(node, org) if node_type == TAX_IMPORT or node_type == COMBO_IMPORT: # Get the cycle associated with the node import_cycle = load_cycle(org, node) tax_lot_state = create_tax_lot_state_for_node(node, org, cb) tax_lot_state_created += 1 query = seed.models.TaxLotView.objects.filter(taxlot=tax_lot, cycle=import_cycle) if query.count(): taxlotview = query.first() taxlotview.update_state(tax_lot_state, name="Merge current state in migration") taxlotview.save() else: taxlotview, created = seed.models.TaxLotView.objects.get_or_create(taxlot=tax_lot, cycle=import_cycle, state=tax_lot_state) tax_lot_view_created += int(created) assert created, "Should have created a tax lot." taxlotview.save() last_taxlot_view[taxlotview.cycle] = taxlotview elif node_type == PROPERTY_IMPORT or node_type == COMBO_IMPORT: import_cycle = load_cycle(org, node) property_state = create_property_state_for_node(node, org, cb) property_state_created += 1 query = seed.models.PropertyView.objects.filter(property=property_obj, cycle=import_cycle) if query.count(): propertyview = query.first() propertyview.update_state(property_state, name="Merge current state in migration") propertyview.save() else: propertyview, created = seed.models.PropertyView.objects.get_or_create( property=property_obj, cycle=import_cycle, state=property_state) assert created, "Should have created something" property_view_created += int(created) propertyview.save() last_property_view[propertyview.cycle] = propertyview if node_type == MERGE or node_type == COMBO_IMPORT: m2m_cycle = load_cycle(org, node) if last_property_view[m2m_cycle] and last_taxlot_view[m2m_cycle]: if node_type == MERGE: # Check to make sure the last stuff created is # associated with the same cycle as the merge. assert last_property_view[m2m_cycle], "Didn't expect NO proeprty view" assert last_taxlot_view[m2m_cycle], "Didn't expect NO tax lot view" # # FIXME - bad logic # if m2m_cycle != last_property_view[cycle].cycle: # # Ultimately Copy the state over to a new state # last_property_view, _ = seed.models.PropertyView.objects.get_or_create(property=property_obj, cycle=m2m_cycle, state=last_property_view.state) # if m2m_cycle != last_taxlot_view.cycle: # last_taxlot_view, _ = seed.models.TaxLotView.objects.get_or_create(taxlot=tax_lot, cycle=m2m_cycle, state=last_taxlot_view.state) # assert m2m_cycle == last_taxlot_view.cycle == last_property_view.cycle, "Why aren't all these equal?!" tlp, created = seed.models.TaxLotProperty.objects.get_or_create( property_view=last_property_view[m2m_cycle], taxlot_view=last_taxlot_view[m2m_cycle], cycle=m2m_cycle) m2m_created += int(created) else: import_cycle = load_cycle(org, node) # Treat it like an import. if node_has_tax_lot_info(node, org): tax_lot_state = create_tax_lot_state_for_node(node, org, cb) tax_lot_state_created += 1 # Check if there is a TaxLotView Present taxlotview, created = seed.models.TaxLotView.objects.update_or_create( taxlot=tax_lot, cycle=import_cycle, defaults={"state": tax_lot_state}) tax_lot_view_created += int(created) taxlotview.save() last_taxlot_view[taxlotview.cycle] = taxlotview if node_has_property_info(node, org): property_state = create_property_state_for_node(node, org, cb) property_state_created += 1 propertyview, created = seed.models.PropertyView.objects.update_or_create( property=property_obj, cycle=import_cycle, defaults={"state": property_state}) property_view_created += int(created) propertyview.save() last_property_view[propertyview.cycle] = propertyview if node_has_tax_lot_info(node, org) and node_has_property_info(node, org): _, created = seed.models.TaxLotProperty.objects.get_or_create( property_view=last_property_view[import_cycle], taxlot_view=last_taxlot_view[import_cycle], cycle=import_cycle) m2m_created += int(created) logging_info( "{} Tax Lot, {} Property, {} TaxLotView, {} PropertyView, {} TaxLotState, {} PropertyState, {} m2m created.".format( tax_lot_created, property_created, tax_lot_view_created, property_view_created, tax_lot_state_created, property_state_created, m2m_created)) return
def handle(self, *args, **options): """Migrate the CanonicalBuildings for one or more Organizations into the new 'BlueSky' data structures.""" logging_info("RUN migrate_organization with args={},kwds={}".format( args, options)) # Process Arguments if options['organization']: core_organization = map(int, options['organization'].split(",")) else: core_organization = get_core_organizations() limit = options['limit'] if "limit" in options else 0 starting_from_canonical = False if not options[ 'starting_on_canonical'] else options['starting_on_canonical'] starting_on_canonical_following = False if not options['starting_following_canonical'] else \ options['starting_following_canonical'] ADD_METADATA = options['add_metadata'] assert not ( starting_on_canonical_following and starting_from_canonical ), "Can only specify one of --starting_on_canonical and --starting_following_canonical" canonical_buildings_whitelist = map(int, options['cb_whitelist_string'].split(",")) if \ options['cb_whitelist_string'] else False # End Processing tree_file = get_static_building_snapshot_tree_file() m2m = read_building_snapshot_tree_structure(tree_file) all_nodes = set(map(projection_onto_index(0), m2m)).union(set(map(projection_onto_index(1), m2m))) child_dictionary = collections.defaultdict(lambda: set()) parent_dictionary = collections.defaultdict(lambda: set()) adj_dim = max(all_nodes) + 1 adj_matrix = dok_matrix((adj_dim, adj_dim), dtype=np.bool) for (from_node, to_node) in m2m: adj_matrix[from_node, to_node] = 1 child_dictionary[from_node].add(to_node) parent_dictionary[to_node].add(from_node) # Core data struct, possible refactor point. all_nodes, child_dictionary, parent_dictionary, adj_matrix # We don't care about the total number because _, labelarray = connected_components(adj_matrix) counts = collections.Counter() for label in labelarray: counts[label] += 1 logging_info("Migration organization: {}".format(",".join( map(str, core_organization)))) for org_id in core_organization: # Writing loop over organizations org = Organization.objects.get(pk=org_id) logging_info("Processing organization {}".format(org)) assert org, "Organization {} not found".format(org_id) org_canonical_buildings = seed.models.CanonicalBuilding.objects.filter( canonical_snapshot__super_organization=org_id, active=True).all() org_canonical_snapshots = [ cb.canonical_snapshot for cb in org_canonical_buildings ] # FIXME: Turns out the ids are on Building # Snapshot. Leaving it this way because the other code # should be displaying canonical building indexes. if starting_from_canonical or starting_on_canonical_following: # org_canonical_ids = map(lambda x: x.pk, org_canonical_buildings) org_canonical_ids = map(lambda x: x.pk, org_canonical_snapshots) try: canonical_index = max(starting_from_canonical, starting_on_canonical_following) canonical_index = org_canonical_ids.index(canonical_index) if starting_on_canonical_following: canonical_index += 1 org_canonical_buildings = list( org_canonical_buildings)[canonical_index:] org_canonical_snapshots = list( org_canonical_snapshots)[canonical_index:] ref_str = "starting" if starting_from_canonical else "following" logging_info( "Restricting to canonical starting ndx={} (id={}), was {} now {}." .format(canonical_index, len(org_canonical_ids), len(org_canonical_ids), len(org_canonical_buildings))) except ValueError: raise RuntimeError( "Requested to start referencing canonical building id={} which was not found." .format(starting_from_canonical)) if canonical_buildings_whitelist: good_canonical_building_indexes = [ ndx for (ndx, cb) in enumerate(org_canonical_buildings) if cb.pk in canonical_buildings_whitelist ] org_canonical_buildings = [ org_canonical_buildings[ndx] for ndx in good_canonical_building_indexes ] org_canonical_snapshots = [ org_canonical_snapshots[ndx] for ndx in good_canonical_building_indexes ] logging_info("Restricted to {} elements in whitelist.".format( len(org_canonical_buildings))) logging_info("IDS: {}".format(", ".join( map(lambda cs: str(cs.pk), org_canonical_buildings)))) if canonical_buildings_whitelist: good_canonical_building_indexes = [ ndx for (ndx, cb) in enumerate(org_canonical_buildings) if cb.pk in canonical_buildings_whitelist ] org_canonical_buildings = [ org_canonical_buildings[ndx] for ndx in good_canonical_building_indexes ] org_canonical_snapshots = [ org_canonical_snapshots[ndx] for ndx in good_canonical_building_indexes ] logging_info("Restricted to {} elements in whitelist.".format( len(org_canonical_buildings))) logging_info("IDS: {}".format(", ".join( map(lambda cs: str(cs.pk), org_canonical_buildings)))) if len(org_canonical_buildings) == 0: logging_info("Organization {} has no buildings".format(org_id)) continue last_date = max([cs.modified for cs in org_canonical_snapshots]) # create_bluesky_cycles_for_org(org, last_date) tree_sizes = [ counts[labelarray[bs.id]] for bs in org_canonical_snapshots ] ## For each of those trees find the tip ## For each of those trees find the import records ## For each of those trees find the cycles associated with it for ndx, (cb, bs) in enumerate( zip(org_canonical_buildings, org_canonical_snapshots)): if limit and (ndx + 1) > limit: logging_info("Migrated limit={} buildings.".format(limit)) logging_info( "Skipping remainder of buildings for organization.") break logging_info("Processing Building {}/{}".format( ndx + 1, len(org_canonical_snapshots))) bs_label = labelarray[bs.id] import_nodes, leaf_nodes, other_nodes = get_node_sinks( bs_label, labelarray, parent_dictionary, child_dictionary) # Load all those building snapshots tree_nodes = itertools.chain(import_nodes, leaf_nodes, other_nodes) building_dict = { bs.id: bs for bs in seed.models.BuildingSnapshot.objects.filter( pk__in=tree_nodes).all() } missing_buildings = [ node for node in itertools.chain(import_nodes, leaf_nodes, other_nodes) if node not in building_dict ] import_buildingsnapshots = [ building_dict[bs_id] for bs_id in import_nodes ] leaf_buildingsnapshots = [ building_dict[bs_id] for bs_id in leaf_nodes ] assert len(leaf_buildingsnapshots), ( "Expected only one leaf of the canonical building family of " "buildings. Found {}").format(len(leaf_buildingsnapshots)) leaf = leaf_buildingsnapshots[0] other_buildingsnapshots = [ building_dict[bs_id] for bs_id in other_nodes ] logging_info( "Creating Blue Sky Data for for CanonicalBuilding={}". format(cb.pk)) # embed() create_associated_bluesky_taxlots_properties( org, import_buildingsnapshots, leaf, other_buildingsnapshots, child_dictionary, parent_dictionary, adj_matrix, cb) logging_info("END migrate_organization") return
def create_associated_bluesky_taxlots_properties( org, import_buildingsnapshots, leaf_building, other_buildingsnapshots, child_dictionary, parent_dictionary, adj_matrix, cb): """Take tree structure describing a single Property/TaxLot over time and create the entities.""" logging_info( "Populating new blue sky entities for canonical snapshot tree!") print "Processing {}/{}/{}".format(len(import_buildingsnapshots), 1, len(other_buildingsnapshots)) tax_lot_created = 0 property_created = 0 tax_lot_view_created = 0 property_view_created = 0 tax_lot_state_created = 0 property_state_created = 0 m2m_created = 0 logging_info("Creating Property/TaxLot from {} nodes".format( sum( map(len, ([leaf_building ], other_buildingsnapshots, import_buildingsnapshots))))) tax_lot = None property_obj = None leaf_node_type = classify_node(leaf_building, org) #if node_has_tax_lot_info(leaf_building, org): if leaf_node_type in [TAX_IMPORT, COMBO_IMPORT, MERGE]: tax_lot, created = find_or_create_bluesky_taxlot_associated_with_building_snapshot( leaf_building, org) # tax_lot = seed.models.TaxLot(organization=org) tax_lot.save() tax_lot_created += int(created) # if node_has_property_info(leaf_building, org): if leaf_node_type in [PROPERTY_IMPORT, COMBO_IMPORT, MERGE]: property_obj, created = find_or_create_bluesky_property_associated_with_building_snapshot( leaf_building, org) # property_obj = seed.models.Property(organization=org) property_obj.save() property_created += int(created) if not property_obj and not tax_lot: property_obj = seed.models.Property(organization=org) property_obj.save() property_created += 1 last_taxlot_view = collections.defaultdict(lambda: False) last_property_view = collections.defaultdict(lambda: False) all_nodes = list( itertools.chain(import_buildingsnapshots, other_buildingsnapshots, [leaf_building])) all_nodes.sort(key=lambda rec: rec.created) # Sort from first to last x = None for ndx, node in enumerate(all_nodes): # if node.pk == leaf_building.pk: # pdb.set_trace() node_type = classify_node(node, org) if node_type == TAX_IMPORT or node_type == COMBO_IMPORT or node_type == MERGE: # Get the cycle associated with the node import_cycle = load_cycle(org, node) if import_cycle.start.year == 2015: x = import_cycle # print "Node {} cycle is {} with address {}, {}".format(node, import_cycle, node.extra_data['Building Address'] if 'Building Address' in node.extra_data.keys() else "NONE", node.gross_floor_area) tax_lot_state = create_tax_lot_state_for_node(node, org, cb) tax_lot_state_created += 1 query = seed.models.TaxLotView.objects.filter(taxlot=tax_lot, cycle=import_cycle) if query.count(): taxlotview = query.first() taxlotview.update_state( tax_lot_state, name="Merge current state in migration") taxlotview.save() else: taxlotview, created = seed.models.TaxLotView.objects.get_or_create( taxlot=tax_lot, cycle=import_cycle, state=tax_lot_state) tax_lot_view_created += int(created) assert created, "Should have created a tax lot." taxlotview.save() last_taxlot_view[taxlotview.cycle] = taxlotview if node_type == PROPERTY_IMPORT or node_type == COMBO_IMPORT or node_type == MERGE: import_cycle = load_cycle(org, node) property_state = create_property_state_for_node(node, org, cb) # if import_cycle.start.year == 2015: # print property_state.extra_data['Building Address'] property_state_created += 1 query = seed.models.PropertyView.objects.filter( property=property_obj, cycle=import_cycle) if query.count(): propertyview = query.first() propertyview.update_state( property_state, name="Merge current state in migration") propertyview.save() else: propertyview, created = seed.models.PropertyView.objects.get_or_create( property=property_obj, cycle=import_cycle, state=property_state) assert created, "Should have created something" property_view_created += int(created) propertyview.save() last_property_view[propertyview.cycle] = propertyview if node_type == MERGE or node_type == COMBO_IMPORT: m2m_cycle = load_cycle(org, node) if last_property_view[m2m_cycle] and last_taxlot_view[m2m_cycle]: if node_type == MERGE: # Check to make sure the last stuff created is # associated with the same cycle as the merge. assert last_property_view[ m2m_cycle], "Did not expect NO property view" assert last_taxlot_view[ m2m_cycle], "Did not expect NO tax lot view" # # FIXME - bad logic # if m2m_cycle != last_property_view[cycle].cycle: # # Ultimately Copy the state over to a new state # last_property_view, _ = seed.models.PropertyView.objects.get_or_create(property=property_obj, cycle=m2m_cycle, state=last_property_view.state) # if m2m_cycle != last_taxlot_view.cycle: # last_taxlot_view, _ = seed.models.TaxLotView.objects.get_or_create(taxlot=tax_lot, cycle=m2m_cycle, state=last_taxlot_view.state) # assert m2m_cycle == last_taxlot_view.cycle == last_property_view.cycle, "Why are not all these equal?!" tlp, created = seed.models.TaxLotProperty.objects.get_or_create( property_view=last_property_view[m2m_cycle], taxlot_view=last_taxlot_view[m2m_cycle], cycle=m2m_cycle) m2m_created += int(created) else: import_cycle = load_cycle(org, node) # Treat it like an import. if node_has_tax_lot_info(node, org): tax_lot_state = create_tax_lot_state_for_node( node, org, cb) tax_lot_state_created += 1 # Check if there is a TaxLotView Present taxlotview, created = seed.models.TaxLotView.objects.update_or_create( taxlot=tax_lot, cycle=import_cycle, defaults={"state": tax_lot_state}) tax_lot_view_created += int(created) taxlotview.save() last_taxlot_view[taxlotview.cycle] = taxlotview if node_has_property_info(node, org): property_state = create_property_state_for_node( node, org, cb) property_state_created += 1 propertyview, created = seed.models.PropertyView.objects.update_or_create( property=property_obj, cycle=import_cycle, defaults={"state": property_state}) property_view_created += int(created) propertyview.save() last_property_view[propertyview.cycle] = propertyview if node_has_tax_lot_info(node, org) and node_has_property_info( node, org): _, created = seed.models.TaxLotProperty.objects.get_or_create( property_view=last_property_view[import_cycle], taxlot_view=last_taxlot_view[import_cycle], cycle=import_cycle) m2m_created += int(created) # print "{}: {}".format(ndx, last_taxlot_view[x].state.extra_data["Building Address"] if "Building Address" in last_taxlot_view[x].state.extra_data.keys() else last_taxlot_view[x].state.extra_data.keys()) a = 10 logging_info( "{} Tax Lot, {} Property, {} TaxLotView, {} PropertyView, {} TaxLotState, {} PropertyState, {} m2m created." .format(tax_lot_created, property_created, tax_lot_view_created, property_view_created, tax_lot_state_created, property_state_created, m2m_created)) return
def handle(self, *args, **options): logging_info("RUN migrate_extradata_columns with args={},kwds={}".format(args, options)) # Process Arguments if options['organization']: organization_ids = map(int, options['organization'].split(",")) else: organization_ids = get_core_organizations() clear_bluesky_labels = options["clear_bluesky_labels"] add_property_labels = options["add_property_labels"] add_taxlot_labels = options["add_taxlot_labels"] for org_id in organization_ids: ############################## # Handle the clear case. This is a bit inelegant the # way the loop on org_ids is setup. if clear_bluesky_labels: print "Org={}: Clearing all labels on Property and TaxLot objects.".format(org_id) for prop in Property.objects.filter(organization = org_id).all(): prop.labels.clear() for tax_lot in TaxLot.objects.filter(organization = org_id).all(): tax_lot.labels.clear() continue # End Clear Case ############################## print ("Org={}: Migrating Labels with settings add_property_labels={}" ", add_taxlot_labels={}").format(org_id, add_property_labels, add_taxlot_labels) ############################## # Copy Property if add_property_labels: # This is inefficient, in that it does each # property/tax lot multiple times for each of it's # views - but it's clear and shouldn't be # prohibitively wasteful. # Alternatively you could grab the first # propertyview/taxlotview for each property/taxlot # and then use the state on that to populate the # property/taxlot. for pv in PropertyView.objects.filter(property__organization = org_id).select_related('state').all(): if not "prop_cb_id" in pv.state.extra_data: print "Warning: key 'prop_cb_id' was not found for PropertyView={}".format(pv) continue cb_id = pv.state.extra_data['prop_cb_id'] try: cb = CanonicalBuilding.objects.get(pk=cb_id) except ObjectDoesNotExist, xcpt: print "Warning: Canonical Building={} was not found in the DB".format(cb_id) continue cb_labels = cb.labels.all() preexisting_pv_labels = set(map(lambda l: l.pk, pv.property.labels.all())) for label in cb_labels: if label.pk not in preexisting_pv_labels: pv.property.labels.add(label) else: pv.property.save() # ############################## ############################## # Copy Tax Lot labels if add_taxlot_labels: for tlv in TaxLotView.objects.filter(taxlot__organization = org_id).select_related('state').all(): if not "taxlot_cb_id" in tlv.state.extra_data: print "Warning: key 'prop_cb_id' was not found for TaxLotView={}".format(tlv) continue cb_id = tlv.state.extra_data['taxlot_cb_id'] try: cb = CanonicalBuilding.objects.get(pk=cb_id) except ObjectDoesNotExist, xcpt: print "Warning: Canonical Building={} was not found in the DB".format(cb_id) continue cb_labels = cb.labels.all() preexisting_tlv_labels = set(map(lambda l: l.pk, tlv.taxlot.labels.all())) for label in cb_labels: if label.pk not in preexisting_tlv_labels: tlv.taxlot.labels.add(label) else: tlv.taxlot.save()
def handle(self, *args, **options): """Migrate the CanonicalBuildings for one or more Organizations into the new 'BlueSky' data structures.""" logging_info("RUN migrate_organization with args={},kwds={}".format(args, options)) # Process Arguments if options['organization']: core_organization = map(int, options['organization'].split(",")) else: core_organization = get_core_organizations() limit = options['limit'] if "limit" in options else 0 starting_from_canonical = False if not options['starting_on_canonical'] else options[ 'starting_on_canonical'] starting_on_canonical_following = False if not options['starting_following_canonical'] else \ options['starting_following_canonical'] ADD_METADATA = options['add_metadata'] assert not ( starting_on_canonical_following and starting_from_canonical), "Can only specify one of --starting_on_canonical and --starting_following_canonical" canonical_buildings_whitelist = map(int, options['cb_whitelist_string'].split(",")) if \ options['cb_whitelist_string'] else False # End Processing tree_file = get_static_building_snapshot_tree_file() m2m = read_building_snapshot_tree_structure(tree_file) all_nodes = set(map(projection_onto_index(0), m2m)).union( set(map(projection_onto_index(1), m2m))) child_dictionary = collections.defaultdict(lambda: set()) parent_dictionary = collections.defaultdict(lambda: set()) adj_dim = max(all_nodes) + 1 adj_matrix = dok_matrix((adj_dim, adj_dim), dtype=np.bool) for (from_node, to_node) in m2m: adj_matrix[from_node, to_node] = 1 child_dictionary[from_node].add(to_node) parent_dictionary[to_node].add(from_node) # Core data struct, possible refactor point. all_nodes, child_dictionary, parent_dictionary, adj_matrix # We don't care about the total number because _, labelarray = connected_components(adj_matrix) counts = collections.Counter() for label in labelarray: counts[label] += 1 logging_info("Migration organization: {}".format(",".join(map(str, core_organization)))) for org_id in core_organization: # Writing loop over organizations org = Organization.objects.get(pk=org_id) logging_info("Processing organization {}".format(org)) assert org, "Organization {} not found".format(org_id) org_canonical_buildings = seed.models.CanonicalBuilding.objects.filter( canonical_snapshot__super_organization=org_id, active=True).all() org_canonical_snapshots = [cb.canonical_snapshot for cb in org_canonical_buildings] # FIXME: Turns out the ids are on Building # Snapshot. Leaving it this way because the other code # should be displaying canonical building indexes. if starting_from_canonical or starting_on_canonical_following: # org_canonical_ids = map(lambda x: x.pk, org_canonical_buildings) org_canonical_ids = map(lambda x: x.pk, org_canonical_snapshots) try: canonical_index = max(starting_from_canonical, starting_on_canonical_following) canonical_index = org_canonical_ids.index(canonical_index) if starting_on_canonical_following: canonical_index += 1 org_canonical_buildings = list(org_canonical_buildings)[canonical_index:] org_canonical_snapshots = list(org_canonical_snapshots)[canonical_index:] ref_str = "starting" if starting_from_canonical else "following" logging_info( "Restricting to canonical starting ndx={} (id={}), was {} now {}.".format( canonical_index, len(org_canonical_ids), len(org_canonical_ids), len(org_canonical_buildings))) except ValueError: raise RuntimeError( "Requested to start referencing canonical building id={} which was not found.".format( starting_from_canonical)) if canonical_buildings_whitelist: good_canonical_building_indexes = [ndx for (ndx, cb) in enumerate(org_canonical_buildings) if cb.pk in canonical_buildings_whitelist] org_canonical_buildings = [org_canonical_buildings[ndx] for ndx in good_canonical_building_indexes] org_canonical_snapshots = [org_canonical_snapshots[ndx] for ndx in good_canonical_building_indexes] logging_info( "Restricted to {} elements in whitelist.".format(len(org_canonical_buildings))) logging_info("IDS: {}".format( ", ".join(map(lambda cs: str(cs.pk), org_canonical_buildings)))) if canonical_buildings_whitelist: good_canonical_building_indexes = [ndx for (ndx, cb) in enumerate(org_canonical_buildings) if cb.pk in canonical_buildings_whitelist] org_canonical_buildings = [org_canonical_buildings[ndx] for ndx in good_canonical_building_indexes] org_canonical_snapshots = [org_canonical_snapshots[ndx] for ndx in good_canonical_building_indexes] logging_info( "Restricted to {} elements in whitelist.".format(len(org_canonical_buildings))) logging_info("IDS: {}".format( ", ".join(map(lambda cs: str(cs.pk), org_canonical_buildings)))) if len(org_canonical_buildings) == 0: logging_info("Organization {} has no buildings".format(org_id)) continue last_date = max([cs.modified for cs in org_canonical_snapshots]) # create_bluesky_cycles_for_org(org, last_date) tree_sizes = [counts[labelarray[bs.id]] for bs in org_canonical_snapshots] ## For each of those trees find the tip ## For each of those trees find the import records ## For each of those trees find the cycles associated with it for ndx, (cb, bs) in enumerate(zip(org_canonical_buildings, org_canonical_snapshots)): if limit and (ndx + 1) > limit: logging_info("Migrated limit={} buildings.".format(limit)) logging_info("Skipping remainder of buildings for organization.") break logging_info( "Processing Building {}/{}".format(ndx + 1, len(org_canonical_snapshots))) bs_label = labelarray[bs.id] import_nodes, leaf_nodes, other_nodes = get_node_sinks(bs_label, labelarray, parent_dictionary, child_dictionary) # Load all those building snapshots tree_nodes = itertools.chain(import_nodes, leaf_nodes, other_nodes) building_dict = {bs.id: bs for bs in seed.models.BuildingSnapshot.objects.filter( pk__in=tree_nodes).all()} missing_buildings = [node for node in itertools.chain(import_nodes, leaf_nodes, other_nodes) if node not in building_dict] import_buildingsnapshots = [building_dict[bs_id] for bs_id in import_nodes] leaf_buildingsnapshots = [building_dict[bs_id] for bs_id in leaf_nodes] assert len(leaf_buildingsnapshots), ( "Expected only one leaf of the canonical building family of " "buildings. Found {}").format(len(leaf_buildingsnapshots)) leaf = leaf_buildingsnapshots[0] other_buildingsnapshots = [building_dict[bs_id] for bs_id in other_nodes] logging_info("Creating Blue Sky Data for for CanonicalBuilding={}".format(cb.pk)) # embed() create_associated_bluesky_taxlots_properties(org, import_buildingsnapshots, leaf, other_buildingsnapshots, child_dictionary, parent_dictionary, adj_matrix, cb) logging_info("END migrate_organization") return
############################## # Copy Tax Lot labels if add_taxlot_labels: for tlv in TaxLotView.objects.filter(taxlot__organization = org_id).select_related('state').all(): if not "taxlot_cb_id" in tlv.state.extra_data: print "Warning: key 'prop_cb_id' was not found for TaxLotView={}".format(tlv) continue cb_id = tlv.state.extra_data['taxlot_cb_id'] try: cb = CanonicalBuilding.objects.get(pk=cb_id) except ObjectDoesNotExist, xcpt: print "Warning: Canonical Building={} was not found in the DB".format(cb_id) continue cb_labels = cb.labels.all() preexisting_tlv_labels = set(map(lambda l: l.pk, tlv.taxlot.labels.all())) for label in cb_labels: if label.pk not in preexisting_tlv_labels: tlv.taxlot.labels.add(label) else: tlv.taxlot.save() # ############################## logging_info("END migrate_extradata_columns") return
taxlot__organization=org_id).select_related( 'state').all(): if not "taxlot_cb_id" in tlv.state.extra_data: print "Warning: key 'prop_cb_id' was not found for TaxLotView={}".format( tlv) continue cb_id = tlv.state.extra_data['taxlot_cb_id'] try: cb = CanonicalBuilding.objects.get(pk=cb_id) except ObjectDoesNotExist, xcpt: print "Warning: Canonical Building={} was not found in the DB".format( cb_id) continue cb_labels = cb.labels.all() preexisting_tlv_labels = set( map(lambda l: l.pk, tlv.taxlot.labels.all())) for label in cb_labels: if label.pk not in preexisting_tlv_labels: tlv.taxlot.labels.add(label) else: tlv.taxlot.save() # ############################## logging_info("END migrate_extradata_columns") return
def handle(self, *args, **options): logging_info("RUN migrate_dc_m2m with args={},kwds={}".format( args, options)) dc_org = Organization.objects.get(pk=DC_ORG_PK) dc_cycles = Cycle.objects.filter(organization=dc_org) assert os.path.isfile( DC_M2M_FN), "DC Junction File '{}' not found".format(DC_M2M_FN) num_m2m = TaxLotProperty.objects.filter( property_view__property__organization=dc_org).count() logging_info("Deleting {} M2M objects for org {}".format( num_m2m, dc_org)) TaxLotProperty.objects.filter( property_view__property__organization=dc_org).delete() reader = csv.reader(open(DC_M2M_FN, 'rU')) reader.next() # Throw away header pmids_taxlotids_m2m = [(y, z) for (x, y, z) in reader] all_links = set(pmids_taxlotids_m2m) all_links = processLinks(all_links) all_properties = set(map(lambda (x, y): x, all_links)) all_taxlots = set(map(lambda (x, y): y, all_links)) # print "-" * 30 # print "Taxlots with len != 8" # for x in sorted(filter(lambda x: len(x) != 8, all_taxlots), key=len): print x # print "-" * 30 found_links = set() found_taxlots = set() found_properties = set() logging_info( "Processing {} m2m links from {} file across {} cycles.".format( len(pmids_taxlotids_m2m), DC_M2M_FN, len(dc_cycles))) for (ndx, (pm_id, tl_id)) in enumerate(all_links): if ndx % 400 == 1: percent_done = 100.0 * ndx / len(pmids_taxlotids_m2m) print "{:.2f}% done.".format(percent_done) for cycle in dc_cycles: pv = PropertyView.objects.filter(state__organization=dc_org, state__pm_property_id=pm_id, cycle=cycle) tlv = TaxLotView.objects.filter( state__organization=dc_org, state__jurisdiction_tax_lot_id=tl_id, cycle=cycle) if len(pv): found_properties.add(pm_id) if len(tlv): found_taxlots.add(tl_id) if len(pv) and len(tlv): TaxLotProperty.objects.create(property_view=pv.first(), taxlot_view=tlv.first(), cycle=cycle) found_links.add((pm_id, tl_id)) else: # pdb.set_trace() print "Found {}% of Properties - {} found, {} unfound".format( 100.0 * len(found_properties) / len(all_properties), len(found_properties), len(all_properties) - len(found_properties)) print "Found {}% of TaxLots - {} found, {} unfound".format( 100.0 * len(found_taxlots) / len(all_taxlots), len(found_taxlots), len(all_taxlots) - len(found_taxlots)) print "Found {}% of Links - {} found, {} unfound".format( 100.0 * len(found_links) / len(all_links), len(found_links), len(all_links) - len(found_links)) print "Unmatched Properties:" for p in sorted(all_properties - found_properties): print p print "\n" * 2 print "Unmatched TaxLots:" for p in sorted(all_taxlots - found_taxlots): print p logging_info("END migrate_dc_m2m") return
def handle(self, *args, **options): logging_info("RUN migrate_dc_m2m with args={},kwds={}".format(args, options)) dc_org = Organization.objects.get(pk=DC_ORG_PK) dc_cycles = Cycle.objects.filter(organization=dc_org) assert os.path.isfile(DC_M2M_FN), "DC Junction File '{}' not found".format(DC_M2M_FN) num_m2m = TaxLotProperty.objects.filter(property_view__property__organization=dc_org).count() logging_info("Deleting {} M2M objects for org {}".format(num_m2m, dc_org)) TaxLotProperty.objects.filter(property_view__property__organization=dc_org).delete() reader = csv.reader(open(DC_M2M_FN, 'rU')) reader.next() # Throw away header pmids_taxlotids_m2m = [(y,z) for (x,y,z) in reader] all_links = set(pmids_taxlotids_m2m) all_links = processLinks(all_links) all_properties = set(map(lambda (x,y): x, all_links)) all_taxlots = set(map(lambda (x,y): y, all_links)) # print "-" * 30 # print "Taxlots with len != 8" # for x in sorted(filter(lambda x: len(x) != 8, all_taxlots), key=len): print x # print "-" * 30 found_links = set() found_taxlots = set() found_properties = set() logging_info("Processing {} m2m links from {} file across {} cycles.".format(len(pmids_taxlotids_m2m), DC_M2M_FN, len(dc_cycles))) for (ndx, (pm_id, tl_id)) in enumerate(all_links): if ndx % 400 == 1: percent_done = 100.0 * ndx / len(pmids_taxlotids_m2m) print "{:.2f}% done.".format(percent_done) for cycle in dc_cycles: pv = PropertyView.objects.filter(state__organization=dc_org, state__pm_property_id=pm_id, cycle=cycle) tlv = TaxLotView.objects.filter(state__organization=dc_org, state__jurisdiction_tax_lot_id=tl_id, cycle=cycle) if len(pv): found_properties.add(pm_id) if len(tlv): found_taxlots.add(tl_id) if len(pv) and len(tlv): TaxLotProperty.objects.create(property_view=pv.first(), taxlot_view=tlv.first(), cycle=cycle) found_links.add((pm_id, tl_id)) else: # pdb.set_trace() print "Found {}% of Properties - {} found, {} unfound".format(100.0 * len(found_properties) / len(all_properties), len(found_properties), len(all_properties) - len(found_properties)) print "Found {}% of TaxLots - {} found, {} unfound".format(100.0 * len(found_taxlots) / len(all_taxlots), len(found_taxlots), len(all_taxlots) - len(found_taxlots)) print "Found {}% of Links - {} found, {} unfound".format(100.0 * len(found_links) / len(all_links), len(found_links), len(all_links) - len(found_links)) print "Unmatched Properties:" for p in sorted(all_properties - found_properties): print p print "\n" * 2 print "Unmatched TaxLots:" for p in sorted(all_taxlots - found_taxlots): print p logging_info("END migrate_dc_m2m") return
def split_taxlots_into_m2m_relationships(self, org_id, org_rules_map): org = Organization.objects.get(pk=org_id) logging_info("Splitting tax lot lists for organization {}/{}".format( org_id, org.name)) created_tax_lots = collections.defaultdict(lambda: False) for m2m in itertools.chain( TaxLotProperty.objects.filter( property_view__property__organization=org).all(), TaxLotProperty.objects.filter( taxlot_view__taxlot__organization=org).all()): # aggregate_value_from_state(view.state, org_rules_map[org_id]) # In some cases something in this chain of db calls in m2m.taxlot_view.state.jurisdiction_tax_lot_id # something is missing. Log it and continue. try: jurisdiction_tax_lot_id = m2m.taxlot_view.state.jurisdiction_tax_lot_id except Exception as e: logging_error( "Error splitting taxlotproperty {t} into m2m: {e}".format( t=m2m, e=e)) continue logging_info( "Starting to do m2m for jurisdiction_tax_lot_id {id}".format( id=jurisdiction_tax_lot_id)) taxlot_id_list = [] try: taxlot_id_list = get_id_fields( m2m.taxlot_view.state.jurisdiction_tax_lot_id) _log.info("Found taxlot_id_list: {l}".format(l=taxlot_id_list)) except TaxLotIDValueError, e: logging_warn(e) continue if len(taxlot_id_list) <= 1: continue logging_info( "Tax lot view {} w/ tax_lot id {} was split to {} elements: {}" .format(m2m.taxlot_view.pk, m2m.taxlot_view.state.jurisdiction_tax_lot_id, len(taxlot_id_list), taxlot_id_list)) original_taxlot_view = m2m.taxlot_view # Some have duplicates for tax_lot_id in set(taxlot_id_list): logging_info("Break up tax lot {} to {} for cycle {}".format( tax_lot_id, taxlot_id_list, m2m.cycle)) # Take tax lot and create a taxlot, a taxlot view, and a taxlot state. # taxlot state, and an m2m for the view and installs each. # Check to see if the tax lot exists matching_views_qry = TaxLotView.objects.filter( taxlot__organization=org, state__jurisdiction_tax_lot_id=tax_lot_id) matching_views_ct = matching_views_qry.count() logging_info( "Found {ct} matching views".format(ct=matching_views_ct)) if matching_views_qry.count(): tax_lot = matching_views_qry.first().taxlot state = matching_views_qry.first().state logging_info( "Found matching taxlotviews. First is jurisdiction_tax_lot_id {id}" .format(id=state.jurisdiction_tax_lot_id)) # FIXME: Yuck! Refactor me please! created_tax_lots[tax_lot_id] = tax_lot logging_info( "Setting taxlot_state to jurisdiction_tax_lot_id {id}". format(id=original_taxlot_view.state. jurisdiction_tax_lot_id)) # Apparently this is how Django clones things? taxlot_state = original_taxlot_view.state taxlot_state.pk = None taxlot_state.jurisdiction_tax_lot_id = tax_lot_id logging_info( "Setting taxlot_state.jurisdiction_tax_lot_id = {id}". format(id=tax_lot_id)) taxlot_state.save() else: logging_info("No match, make a new TaxLot") tl = TaxLot( organization=m2m.taxlot_view.taxlot.organization) tl.save() created_tax_lots[tax_lot_id] = tl logging_info( "Setting taxlot_state to jurisdiction_tax_lot_id {id}". format(id=original_taxlot_view.state. jurisdiction_tax_lot_id)) # Apparently this is how Django clones things? taxlot_state = original_taxlot_view.state taxlot_state.pk = None taxlot_state.jurisdiction_tax_lot_id = tax_lot_id logging_info( "Setting taxlot_state.jurisdiction_tax_lot_id = {id}". format(id=tax_lot_id)) taxlot_state.save() # Check and see if the Tax Lot View exists qry = TaxLotView.objects.filter( taxlot=created_tax_lots[tax_lot_id], cycle=m2m.cycle) taxlotview_ct = qry.count() logging_info( "Found {ct} matching taxlotviews".format(ct=taxlotview_ct)) if taxlotview_ct: taxlotview = qry.first() logging_debug("Setting the state of {v} to {s}".format( v=taxlotview.state.jurisdiction_tax_lot_id, s=taxlot_state.jurisdiction_tax_lot_id)) taxlotview.state = taxlot_state taxlotview.save() else: logging_debug( "Creating a new TaxLotView with cycle {c} and state {s}" .format(c=m2m.cycle.name, s=taxlot_state.jurisdiction_tax_lot_id)) taxlotview = TaxLotView( taxlot=created_tax_lots[tax_lot_id], cycle=m2m.cycle, state=taxlot_state) # Clone the state from above taxlotview.save() logging_debug( "TaxLotProperty.objects.get_or_create with pm_id {pm}, jurisdiction_id = {j}, cycle = {c}" .format(pm=m2m.property_view.state.pm_property_id, j=taxlotview.state.jurisdiction_tax_lot_id, c=m2m.cycle.name)) TaxLotProperty.objects.get_or_create( property_view=m2m.property_view, taxlot_view=taxlotview, cycle=m2m.cycle) else: # The existing TaxLotView and m2m is deleted. logging_debug( "Deleting existing TaxLotView pm {pm}, jurisdiction {j}". format(pm=m2m.property_view.state.pm_property_id, j=m2m.taxlot_view.state.jurisdiction_tax_lot_id)) tl_view = m2m.taxlot_view m2m.delete() tl_view.delete() pass # Go through each view, find all it's tax lot ids and make sure they don't look like lists of many things. logging_info("{} => {}".format(jurisdiction_tax_lot_id, taxlot_id_list))
def split_taxlots_into_m2m_relationships(self, org_id, org_rules_map): org = Organization.objects.get(pk=org_id) logging_info("Splitting tax lot lists for organization {}/{}".format(org_id, org.name)) created_tax_lots = collections.defaultdict(lambda : False) for m2m in itertools.chain(TaxLotProperty.objects.filter(property_view__property__organization=org).all(), TaxLotProperty.objects.filter(taxlot_view__taxlot__organization=org).all()): # aggregate_value_from_state(view.state, org_rules_map[org_id]) # In some cases something in this chain of db calls in m2m.taxlot_view.state.jurisdiction_tax_lot_id # something is missing. Log it and continue. try: jurisdiction_tax_lot_id = m2m.taxlot_view.state.jurisdiction_tax_lot_id except Exception as e: logging_error("Error splitting taxlotproperty {t} into m2m: {e}".format(t = m2m, e = e)) continue logging_info("Starting to do m2m for jurisdiction_tax_lot_id {id}".format(id = jurisdiction_tax_lot_id)) taxlot_id_list = [] try: taxlot_id_list = get_id_fields(m2m.taxlot_view.state.jurisdiction_tax_lot_id) logger.info("Found taxlot_id_list: {l}".format(l = taxlot_id_list)) except TaxLotIDValueError, e: logging_warn(e) continue if len(taxlot_id_list) <= 1: continue logging_info("Tax lot view {} w/ tax_lot id {} was split to {} elements: {}".format(m2m.taxlot_view.pk, m2m.taxlot_view.state.jurisdiction_tax_lot_id, len(taxlot_id_list), taxlot_id_list)) original_taxlot_view = m2m.taxlot_view # Some have duplicates for tax_lot_id in set(taxlot_id_list): logging_info("Break up tax lot {} to {} for cycle {}".format(tax_lot_id, taxlot_id_list, m2m.cycle)) # Take tax lot and create a taxlot, a taxlot view, and a taxlot state. # taxlot state, and an m2m for the view and installs each. # Check to see if the tax lot exists matching_views_qry = TaxLotView.objects.filter(taxlot__organization=org, state__jurisdiction_tax_lot_id=tax_lot_id) matching_views_ct = matching_views_qry.count() logging_info("Found {ct} matching views".format(ct = matching_views_ct)) if matching_views_qry.count(): tax_lot = matching_views_qry.first().taxlot state = matching_views_qry.first().state logging_info("Found matching taxlotviews. First is jurisdiction_tax_lot_id {id}".format(id = state.jurisdiction_tax_lot_id)) # FIXME: Yuck! Refactor me please! created_tax_lots[tax_lot_id] = tax_lot logging_info("Setting taxlot_state to jurisdiction_tax_lot_id {id}".format(id = original_taxlot_view.state.jurisdiction_tax_lot_id)) # Apparently this is how Django clones things? taxlot_state = original_taxlot_view.state taxlot_state.pk = None taxlot_state.jurisdiction_tax_lot_id = tax_lot_id logging_info("Setting taxlot_state.jurisdiction_tax_lot_id = {id}".format(id = tax_lot_id)) taxlot_state.save() else: logging_info("No match, make a new TaxLot") tl = TaxLot(organization=m2m.taxlot_view.taxlot.organization) tl.save() created_tax_lots[tax_lot_id] = tl logging_info("Setting taxlot_state to jurisdiction_tax_lot_id {id}".format(id = original_taxlot_view.state.jurisdiction_tax_lot_id)) # Apparently this is how Django clones things? taxlot_state = original_taxlot_view.state taxlot_state.pk = None taxlot_state.jurisdiction_tax_lot_id = tax_lot_id logging_info("Setting taxlot_state.jurisdiction_tax_lot_id = {id}".format(id = tax_lot_id)) taxlot_state.save() # Check and see if the Tax Lot View exists qry = TaxLotView.objects.filter(taxlot = created_tax_lots[tax_lot_id], cycle = m2m.cycle) taxlotview_ct = qry.count() logging_info("Found {ct} matching taxlotviews".format(ct = taxlotview_ct)) if taxlotview_ct: taxlotview = qry.first() logging_debug("Setting the state of {v} to {s}".format(v = taxlotview.state.jurisdiction_tax_lot_id, s = taxlot_state.jurisdiction_tax_lot_id)) taxlotview.state = taxlot_state taxlotview.save() else: logging_debug("Creating a new TaxLotView with cycle {c} and state {s}".format(c = m2m.cycle.name, s = taxlot_state.jurisdiction_tax_lot_id)) taxlotview = TaxLotView(taxlot = created_tax_lots[tax_lot_id], cycle = m2m.cycle, state = taxlot_state) # Clone the state from above taxlotview.save() logging_debug("TaxLotProperty.objects.get_or_create with pm_id {pm}, jurisdiction_id = {j}, cycle = {c}".format(pm = m2m.property_view.state.pm_property_id, j = taxlotview.state.jurisdiction_tax_lot_id, c = m2m.cycle.name)) TaxLotProperty.objects.get_or_create(property_view = m2m.property_view, taxlot_view = taxlotview, cycle = m2m.cycle) else: # The existing TaxLotView and m2m is deleted. logging_debug("Deleting existing TaxLotView pm {pm}, jurisdiction {j}".format(pm = m2m.property_view.state.pm_property_id, j = m2m.taxlot_view.state.jurisdiction_tax_lot_id)) tl_view = m2m.taxlot_view m2m.delete() tl_view.delete() pass # Go through each view, find all it's tax lot ids and make sure they don't look like lists of many things. logging_info("{} => {}".format(jurisdiction_tax_lot_id, taxlot_id_list))
def handle(self, *args, **options): logging_info( "RUN migrate_extradata_columns with args={},kwds={}".format( args, options)) # Process Arguments if options['organization']: organization_ids = map(int, options['organization'].split(",")) else: organization_ids = get_core_organizations() clear_bluesky_labels = options["clear_bluesky_labels"] add_property_labels = options["add_property_labels"] add_taxlot_labels = options["add_taxlot_labels"] for org_id in organization_ids: ############################## # Handle the clear case. This is a bit inelegant the # way the loop on org_ids is setup. if clear_bluesky_labels: print "Org={}: Clearing all labels on Property and TaxLot objects.".format( org_id) for prop in Property.objects.filter(organization=org_id).all(): prop.labels.clear() for tax_lot in TaxLot.objects.filter( organization=org_id).all(): tax_lot.labels.clear() continue # End Clear Case ############################## print( "Org={}: Migrating Labels with settings add_property_labels={}" ", add_taxlot_labels={}").format(org_id, add_property_labels, add_taxlot_labels) ############################## # Copy Property if add_property_labels: # This is inefficient, in that it does each # property/tax lot multiple times for each of it's # views - but it's clear and should not be # prohibitively wasteful. # Alternatively you could grab the first # propertyview/taxlotview for each property/taxlot # and then use the state on that to populate the # property/taxlot. for pv in PropertyView.objects.filter( property__organization=org_id).select_related( 'state').all(): if not "prop_cb_id" in pv.state.extra_data: print "Warning: key 'prop_cb_id' was not found for PropertyView={}".format( pv) continue cb_id = pv.state.extra_data['prop_cb_id'] try: cb = CanonicalBuilding.objects.get(pk=cb_id) except ObjectDoesNotExist, xcpt: print "Warning: Canonical Building={} was not found in the DB".format( cb_id) continue cb_labels = cb.labels.all() preexisting_pv_labels = set( map(lambda l: l.pk, pv.property.labels.all())) for label in cb_labels: if label.pk not in preexisting_pv_labels: pv.property.labels.add(label) else: pv.property.save() # ############################## ############################## # Copy Tax Lot labels if add_taxlot_labels: for tlv in TaxLotView.objects.filter( taxlot__organization=org_id).select_related( 'state').all(): if not "taxlot_cb_id" in tlv.state.extra_data: print "Warning: key 'prop_cb_id' was not found for TaxLotView={}".format( tlv) continue cb_id = tlv.state.extra_data['taxlot_cb_id'] try: cb = CanonicalBuilding.objects.get(pk=cb_id) except ObjectDoesNotExist, xcpt: print "Warning: Canonical Building={} was not found in the DB".format( cb_id) continue cb_labels = cb.labels.all() preexisting_tlv_labels = set( map(lambda l: l.pk, tlv.taxlot.labels.all())) for label in cb_labels: if label.pk not in preexisting_tlv_labels: tlv.taxlot.labels.add(label) else: tlv.taxlot.save()
# Go through the tax Lots, collect any that are left, make # sure they aren't a part of any m2m entities. for original_taxlot_view in TaxLotView.objects.filter(taxlot__organization=org).all(): logging_debug("Trying original_taxlot_view jurisdiction {j}".format(j = original_taxlot_view.state.jurisdiction_tax_lot_id)) try: jurisdiction_tax_lot_id = original_taxlot_view.state.jurisdiction_tax_lot_id taxlot_id_list = get_id_fields(jurisdiction_tax_lot_id) logging_debug("Found taxlot_id_list with {ct} items. {l}".format(ct = len(taxlot_id_list), l = taxlot_id_list)) if len(taxlot_id_list) <= 1: continue assert TaxLotProperty.objects.filter(taxlot_view = original_taxlot_view).count() == 0, "Tax Lot should have been broken up already." if TaxLotProperty.objects.filter(taxlot_view = original_taxlot_view).count() != 0: logging_debug("Tax Lot should have been broken up already.") # Some have duplicates for taxlot_id in set(taxlot_id_list): logging_info("Break up tax lot {} to {} for cycle {}".format(jurisdiction_tax_lot_id, taxlot_id_list, m2m.cycle)) # Take tax lot and create a taxlot, a taxlot view, and a taxlot state. # taxlot state, and an m2m for the view and installs each. matching_views_qry = TaxLotView.objects.filter(taxlot__organization=org, state__jurisdiction_tax_lot_id=taxlot_id) matching_views_ct = matching_views_qry.count() logging_debug("Found {ct} matching views".format(ct = matching_views_ct)) if matching_views_ct: taxlot = matching_views_qry.first().taxlot taxlotview_ct = TaxLotView.objects.filter(taxlot = taxlot, cycle = original_taxlot_view.cycle).count() logging_debug("Found {ct} taxlotviews".format(ct = taxlotview_ct)) if taxlotview_ct == 0: taxlot_state = original_taxlot_view.state taxlot_state.pk = None taxlot_state.jurisdiction_tax_lot_id = taxlot_id logging_debug("Creating a copy of the original taxlot_view's state with jurisdiction id {j}".format(j = taxlot_id))