Python update_df Examples

Programming Language: Python

Namespace/Package Name: general_file_functions

Method/Function: update_df

Examples at hotexamples.com: 5

Python update_df - 5 examples found. These are the top rated real world Python examples of general_file_functions.update_df extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: spold1.py Project: ecoinvent/common_objects

 def build_allocation_df(self):
     df = DataFrame()
     exchange_df = self.build_exchange_df()
     for alloc in self.dataset.flowData.iterchildren(tag = tag_prefix + 'allocation'):
         baseline = {0: {'referenceToCoProduct': alloc.get('referenceToCoProduct'), 
                         'CoProductName': exchange_df.loc[int(alloc.get('referenceToCoProduct')), 'name'], 
                         'allocationMethod': int(alloc.get('allocationMethod')), 
                         'fraction': float(alloc.get('fraction'))/100.}}
         for r in alloc.iterchildren(tag = tag_prefix + 'referenceToInputOutput'):
             to_add = deepcopy(baseline)
             sel = exchange_df.loc[int(r.text)]
             to_add[0].update({'referenceToInputOutput': r.text, 
                 'exchange': sel['name']})
             for attr in ['category', 'subCategory', 'group']:
                 to_add[0].update({attr: sel[attr]})
             df = gff.update_df(df, to_add)
     return df

Example #2

Show file

File: spold1.py Project: ecoinvent/common_objects

 def convertion_template(self, MD, folder_template, rosettas, new_MD):
     #mapping[attribute_name_spold2] = attribute_name_spold1
     metainfo_attribute_mapping = {
        'geographyShortname': 'geographyShortname', 
        'generalComment': 'generalComment', 
        'includedActivitiesStart': 'includedProcesses', 
        'geographyComments': 'geographyComment', 
        'technologyComments': 'technologyComment',
        'timePeriodComments': 'timePeriodComment', 
        'samplingProcedure': 'samplingProcedure', 
        'extrapolations': 'extrapolations', 
        'synonyms': 'synonym', 
        'activityName': 'name'
        }
     metainfo_attribute_without_mapping = ['includedActivitiesEnd', 
         'allocationComments', 'technologyLevel', 'macroEconomicComment', 
         'tags']
     line_for_metainfo_template = {}
     for attr in metainfo_attribute_mapping:
         line_for_metainfo_template[attr] = getattr(self, metainfo_attribute_mapping[attr])
     if (line_for_metainfo_template['geographyShortname'] not in 
             set(MD['geographies']['shortname'])):
         line_for_metainfo_template['geographyShortname'] += ' is no longer valid.  Please replace with valid geography shortname from current MasterData.'
     line_for_metainfo_template.update(dict(zip(metainfo_attribute_without_mapping, 
              ['']*len(metainfo_attribute_without_mapping))))
     line_for_metainfo_template['startDate'] = '%s-01-01' % self.startYear
     line_for_metainfo_template['endDate'] = '%s-12-31' % self.endYear
     line_for_metainfo_template['id'] = str(uuid4())
     line_for_metainfo_template['dataEntryBy'] = ''
     line_for_metainfo_template['dataGenerator'] = ''
     if self.name not in MD['ActivityNames'].index:
         if len(new_MD['ActivityNames']) == 0 or self.name not in new_MD['ActivityNames'].index:
             to_add = {len(new_MD['ActivityNames']): {
                 'original activityName': self.name, 
                 'activityName': 'fix me', 
                 'id': str(uuid4())}}
             new_MD['ActivityNames'] = gff.update_df(new_MD['ActivityNames'], to_add)
         line_for_metainfo_template['activityName'] = 'fix me'
     line_for_metainfo_template['activityName source'] = self.name
     for p in self.persons:
         for field1, field2 in [('dataEntryBy', 'dataEntryBy'), 
                 ('dataGenerator', 'dataGeneratorPerson')]:
             if p.number == getattr(self, field2):
                 line_for_metainfo_template[field1] = p.name
                 if p.name not in set(MD['persons']['name']):
                     if len(new_MD['persons']) == 0 or p.name not in set(new_MD['persons']['name']):
                         if len(new_MD['persons']) > 0:
                             1/0
                         to_add = {}
                         for field in ['name', 'address', 'telephone', 'telefax', 
                                 'email', 'companyCode']:
                             to_add[field] = getattr(p, field)
                         if p.companyCode not in MD['companies']['code']:
                             if (len(new_MD['companies']) == 0 or 
                                 p.companyCode not in set(new_MD['companies']['code'])):
                                 to_add2 = {'id': str(uuid4()), 
                                        'name': '', 
                                        'code': p.companyCode, 
                                        'website': '', 
                                        'comment': ''}
                                 new_MD['companies'] = gff.update_df(
                                     new_MD['companies'], {len(
                                     new_MD['companies']): to_add2})
                                 to_add['id'] = to_add2['id']
                             else:
                                 to_add['id'] = new_MD['companies'][new_MD['companies']['code'] == p.companyCode].iloc[0]['id']
                         else:
                             to_add['id'] = MD['companies']['codes'][MD['companies']['codes'] == p.companyCode].iloc[0]['id']
                         new_MD['persons'] = gff.update_df(new_MD['persons'], {len(new_MD['persons']): to_add})
     for s in self.sources:
         fields = gff.list_attributes(s)
         fields.remove('text')
         fields.remove('number')
         if len(new_MD['Sources']) == 0 or s.title not in set(new_MD['Sources']['title']):
             to_add = {}
             for field in fields:
                 to_add[field] = getattr(s, field)
             to_add['comment'] = ''
             to_add['id'] = str(uuid4())
             new_MD['Sources'] = gff.update_df(new_MD['Sources'], 
                 {len(new_MD['Sources']): to_add})
         if s.number == self.referenceToPublishedSource:
             try:
                 uuid = to_add['id']
             except:
                 uuid = new_MD['Sources'][new_MD['Sources']['title'] == s.title].iloc[0]['id']
             line_for_metainfo_template['publishedSourceId'] = uuid
     line_for_metainfo_template['dataPublishedIn'] = self.dataPublishedIn
     line_for_metainfo_template = DataFrame({0: line_for_metainfo_template}).transpose()
     exchanges_template = DataFrame()
     properties_template = DataFrame()
     exchange_attribute_mapping = {'group': 'group', 'name': 'name', 
         'compartment': 'category', 'subcompartment': 'subCategory', 
         'unitName': 'unit', 'amount': 'meanValue', 'comments': 'generalComment', 
         }
     pedigree_fields = ['reliability', 'completeness', 'temporalCorrelation', 
         'geographicalCorrelation', 'furtherTechnologyCorrelation']
     exchange_attribute_without_mapping = ['activityLink activityName', 
         'activityLink geography', 'variableName', 'mathematicalRelation', 
         'productionVolumeAmount', 'productionVolumeComment', 
         'productionVolumeSourceId', 'productionVolumeMathematicalRelation', 
         'uncertainty comments', 'uncertainty type production volume', 
         'variance production volume', 'uncertainty comments production volume', 
         'reliability production volume', 'completeness production volume', 
         'temporalCorrelation production volume', 'geographicalCorrelation production volume', 
         'furtherTechnologyCorrelation production volume', ]
     for group in group_list:
         for exc in getattr(self, group):
             exchange_attributes = {}
             for attr in exchange_attribute_mapping:
                 exchange_attributes[attr] = getattr(exc, exchange_attribute_mapping[attr])
             if 'Environment' in group:
                 if exchange_attributes['compartment'] == 'air':
                     exchange_attributes['compartment'] = 'Air'
                 elif exchange_attributes['compartment'] == 'water':
                     exchange_attributes['compartment'] = 'Water'
                 elif exchange_attributes['compartment'] == 'soil':
                     exchange_attributes['compartment'] = 'Soil'
                 elif exchange_attributes['compartment'] == 'Non material emissions':
                     pass
                 else:
                     1/0
                 if exchange_attributes['subcompartment'] == 'unspecified':
                     exchange_attributes['subcompartment'] = '(unspecified)'
                 ee = (exchange_attributes['name'], 
                           exchange_attributes['compartment'], 
                             exchange_attributes['subcompartment'])
                 if ee not in set(rosettas['ee'].index):
                     for field in ['name', 'compartment', 'subcompartment']:
                         exchange_attributes['original %s' % field] = deepcopy(exchange_attributes[field])
                         exchange_attributes[field] = 'fix me'
                     if len(new_MD['ElementaryExchanges']) == 0 or gff.merge(ee, ', ') not in set(new_MD['ElementaryExchanges']['index']):
                         to_add = {0: {'nameUUID': str(uuid4()), 
                                       'name source': ee[0], 
                                       'name': '', 
                                       'compartment source': ee[1], 
                                       'compartment': '', 
                                       'subcompartment source': ee[2], 
                                         'subcompartment': '', 
                                         'unitName': rosettas['unit'].loc[exc.unit,'name'],  
                                         'casNumber': '', 
                                         'formula': '', 
                                         'synonyms': '', 
                                         'index': gff.merge(ee, ', ')}}
                         new_MD['ElementaryExchanges'] = gff.update_df(
                             new_MD['ElementaryExchanges'], to_add)
                         exchange_attributes['unitName'] = rosettas['unit'].loc[exc.unit,'name']
                 else:
                     sel = rosettas['ee'].loc[ee]
                     if type(sel) != pandas.core.frame.DataFrame:
                         1/0
                     if len(sel) != 1:
                         1/0
                     sel = sel.iloc[0]
                     for field in ['name', 'compartment', 'subcompartment']:
                         exchange_attributes[field] = sel['%s ecoinvent' % field]
                         exchange_attributes['original %s' % field] = ''
                     if not sel['unit equal']:
                         1/0
             else:#intermediate exchange
                 if exchange_attributes['name'] not in set(rosettas['ie'].index):
                     exchange_attributes['original name'] = deepcopy(exchange_attributes['name'])
                     for field in ['compartment', 'subcompartment']:
                         exchange_attributes[field] = ''
                     exchange_attributes['name'] = 'fix me'
                     if len(new_MD['IntermediateExchanges']) == 0 or exchange_attributes['spold1 name'] not in set(new_MD['IntermediateExchanges']['name spold1']):
                         to_add = {0: {'nameUUID': str(uuid4()), 
                           'name source': exchange_attributes['original name'], 
                           'name': '', 
                           'CAS': '', 
                           'comment': '', 
                           'By-product classification': '', 
                           'CPC': '', 
                           'ISIC rev.4 ecoinvent': '', 
                             'synonyms': '', 
                             'unitName': rosettas['unit'].loc[exc.unit,'name']}}
                         new_MD['IntermediateExchanges'] = gff.update_df(
                             new_MD['IntermediateExchanges'], to_add)
                         exchange_attributes['unitName'] = rosettas['unit'].loc[exc.unit,'name']
                 else:
                     for field in ['name', 'compartment', 'subcompartment']:
                         exchange_attributes['original %s' % field] = ''
                     sel = rosettas['ie'].loc[exchange_attributes['name']]
                     exchange_attributes['name'] = sel['ecoinvent product name']
                     exchange_attributes['potential activityLink activityName'] = sel['ecoinvent activityLink activityName']
                     exchange_attributes['potential activityLink geography'] = sel['ecoinvent activityLink geography']
                     new_unit = MD['IntermediateExchanges'][
                             MD['IntermediateExchanges']['name'] == 
                             exchange_attributes['name']].iloc[0]['unitName']
                     if new_unit != exchange_attributes['unitName']:
                         if new_unit == 'unit' and exchange_attributes['unitName'] == 'p':
                             exchange_attributes['unitName'] = new_unit
                         elif new_unit == 'm*year' and exchange_attributes['unitName'] == 'my':
                             exchange_attributes['unitName'] = new_unit
                         else:
                             1/0
             exchange_attributes['variance'] = ''
             if not gff.isempty(exchange_attributes['comments']):
                 if ',BU:' in exchange_attributes['comments']:
                     pedigreeMatrix, rest = exchange_attributes['comments'].split(',BU:')
                     pedigreeMatrix = pedigreeMatrix[1:].split(',')[:5]
                     exchange_attributes['variance'] = np.log(float(rest.split(');')[0]))**2.
                     if exchange_attributes['variance'] > 5.:
                         1/0
                     exchange_attributes['comments'] = gff.merge(rest.split(');')[1:], ');')
                 else:
                     m = pedigree_re.search(exchange_attributes['comments'])
                     if m:
                         1/0
             else:
                 pedigreeMatrix = 5*['']
             if gff.isempty(exc.uncertaintyType) or exc.group == 'ReferenceProduct':
                 exchange_attributes['uncertainty type'] = ''
             elif exc.uncertaintyType == 0:
                 exchange_attributes['uncertainty type'] = 'undefined'
                 exchange_attributes['variance'] = ''
             elif exc.uncertaintyType == 1:
                 exchange_attributes['uncertainty type'] = 'lognormal'
             elif exc.uncertaintyType == 2:
                 1/0
             elif exc.uncertaintyType == 3:
                 1/0
             elif exc.uncertaintyType == 4:
                 1/0
             else:
                 1/0
                 #add info in log about missing pedigree
             for i in range(len(pedigree_fields)):
                 if pedigreeMatrix[i] == '':
                     exchange_attributes[pedigree_fields[i]] = ''
                 elif pedigreeMatrix[i] in ['1', '2', '3', '4', '5']:
                     exchange_attributes[pedigree_fields[i]] = int(pedigreeMatrix[i])
                 else:
                     1/0
             exchange_attributes.update(dict(zip(
                 exchange_attribute_without_mapping, 
                 ['']*len(exchange_attribute_without_mapping))))
             for field in ['sourceId', 'pageNumbers',]:
                 exchange_attributes[field] = ''
             if not gff.isempty(exc.referenceToSource):
                 1/0
             exchanges_template = gff.update_df(exchanges_template, 
                 {len(exchanges_template): exchange_attributes})
     cols = ['group', 'name', 'compartment', 'subcompartment', 'activityLink activityName', 
         'activityLink geography', 'unitName', 'amount', 'variableName', 
         'mathematicalRelation', 'sourceId', 'pageNumbers', 'comments', 
         'productionVolumeAmount', 'productionVolumeComment', 'productionVolumeSourceId', 
         'productionVolumeMathematicalRelation', 'uncertainty type', 'variance', 
         'uncertainty comments', 'reliability', 'completeness', 'temporalCorrelation', 
         'geographicalCorrelation', 'furtherTechnologyCorrelation', 'uncertainty type production volume', 
         'variance production volume', 'uncertainty comments production volume', 
         'reliability production volume', 'completeness production volume', 'temporalCorrelation production volume', 
         'geographicalCorrelation production volume', 'furtherTechnologyCorrelation production volume']
     cols.extend(['original name', 'original compartment', 'original subcompartment', 
         'potential activityLink activityName', 'potential activityLink geography'])
     if 'potential activityLink activityName' not in exchanges_template.columns:
         1/0                
     dfs = [{'df': exchanges_template, 'cols': cols, 'sheetname': 'exchanges'}]
     df_new_exchanges = exchanges_template[exchanges_template['name'] == 'fix me']
     if len(df_new_exchanges) > 0:
         for index in df_new_exchanges.index:
             sel = df_new_exchanges.loc[index]
             fields = ['group', 'name', 'compartment', 'subcompartment', 
                   'activityLink activityName', 'activityLink geography']
             baseline = dict(zip(fields, list(sel[fields])))
             fields2 = ['group', 'original name', 'original compartment', 'original subcompartment']
             baseline.update(dict(zip(fields, list(sel[fields2]))))
         cols = ['group', 'name', 'compartment', 'subcompartment', 'activityLink activityName', 
              'activityLink geography', 'property name', 'unitName', 'amount', 
              'comment', 'sourceId', 'mathematicalRelation', 'variableName']
         #dfs.append({'df': '', 'cols': cols, 'sheetname': 'properties'})
     else:
         pass
     logs = DataFrame()
     for index in exchanges_template.index:
         sel = exchanges_template.loc[index]
         fields = ['group', 'name', 'compartment', 'subcompartment', 
                   'activityLink activityName', 'activityLink geography']
         baseline = dict(zip(fields, list(sel[fields])))
         gravity = 'error'
         if sel['name'] == 'fix me':
             fields2 = ['group', 'original name', 'original compartment', 'original subcompartment']
             baseline.update(dict(zip(fields, list(sel[fields2]))))
             if 'Environment' in sel['group']:
                 message = 'Find appropriate exchange in MasterData'
             elif 'FromTechnoslphere' == sel['group']:
                 message = 'Find appropriate exchange in MasterData, and maybe activityLink activityName and geography'
             else:
                 message = 'Find appropriate exchange in MasterData and enter production volume'
             to_add = {'gravity': gravity, 
                       'line': index + 2, 
                       'message': message}
             to_add.update(baseline)
             logs = gff.update_df(logs, {len(logs): to_add})
             to_add = {'gravity': gravity, 
                       'line': index + 2, 
                       'message': 'If exchange is new, properties have to be entered in the "properties" tab'}
             to_add.update(baseline)
             logs = gff.update_df(logs, {len(logs): to_add})
         #warnings
         gravity = 'warning'
         if not gff.isempty(sel['variance']) and sel['variance'] == .0006:
             to_add = {'gravity': gravity, 
                       'line': index + 2, 
                       'message': 'additional variance by pedigreeMatrix > basic uncertainty.  Variance set to 0.0006. Check pedigree matrix and variance.'}
             to_add.update(baseline)
             logs = gff.update_df(logs, {len(logs): to_add})
         if gff.isempty(sel['sourceId']):
             to_add = {'gravity': gravity, 
                       'line': index + 2, 
                       'message': 'consider entering original information'}
             to_add.update(baseline)
             logs = gff.update_df(logs, {len(logs): to_add})
         if gff.isempty(sel['comments']):
             to_add = {'gravity': gravity, 
                       'line': index + 2, 
                       'message': 'consider entering comment'}
             to_add.update(baseline)
             logs = gff.update_df(logs, {len(logs): to_add})
         if sel['group'] == 'ReferenceProduct':
             for field in ['productionVolumeAmount', 'productionVolumeComment', 
                           'productionVolumeSourceId']:
                 if gff.isempty(sel[field]):
                     to_add = {'gravity': gravity, 
                       'line': index + 2, 
                       'message': 'consider entering %s' % field}
                     to_add.update(baseline)
                     logs = gff.update_df(logs, {len(logs): to_add})
             if gff.isempty(sel['uncertainty type production volume']):
                 to_add = {'gravity': gravity, 
                       'line': index + 2, 
                       'message': 'consider entering uncertainty information for production volume'}
                 to_add.update(baseline)
                 logs = gff.update_df(logs, {len(logs): to_add})
             else:
                 for field in ['variance production volume', 
                     'uncertainty comments', 'production volume', 
                     'reliability production volume', 'completeness production volume', 
                     'temporalCorrelation production volume', 'geographicalCorrelation production volume', 
                     'furtherTechnologyCorrelation production volume']:
                     if gff.isempty(sel[field]):
                         1/0
         else:
             if gff.isempty(sel['uncertainty type']):
                 1/0
             else:
                 fields = ['variance']
                 fields.extend(pedigree_fields)
                 for field in fields:
                     if gff.isempty(sel[field]):
                         1/0
     cols = ['gravity', 'line', 'group', 'name', 
             'compartment', 'subcompartment', 'message']
     dfs.append({'df': logs, 'cols': cols, 'sheetname': 'logs'})
     if not gff.isempty(exchanges_template.iloc[0]['original name']):
         filename = exchanges_template.iloc[0]['original name'].split('|')[0]
         filename += '.xlsx'
     else:
         1/0
     print 'saving', filename
     gff.write_DataFrame(dfs, folder_template, filename)
     line_for_metainfo_template['exchange template filename'] = filename
     return (line_for_metainfo_template, exchanges_template, 
             properties_template, new_MD)

Example #3

Show file

File: simapro_dataset_from_csv.py Project: ecoinvent/common_objects

def read_Simapro_export(folder, filename, tabname):
    if '.csv' in filename:
        1/0
    elif '.xlsx':
        wb = load_workbook(filename = os.path.join(folder, filename), use_iterators = True)
        ws = wb.get_sheet_by_name(name = tabname)
        row_offset = -1
        for row in ws.iter_rows(): #skipping header
            row_offset += 1
            if row[0].value == 'Process':
                start = deepcopy(row_offset)
                break
        dim = ws.calculate_dimension()
        dataset_positions = []
        for row in ws.iter_rows(range_string = dim, row_offset = row_offset):
            if row[0].value == 'End':
                dataset_positions.append((start, deepcopy(row_offset)))
                break
            else:
                row_offset += 1
        projectInputParametersPosition = False
        projectCalculatedParameters = False
        for row in ws.iter_rows(range_string = dim, row_offset = row_offset):
            #positioning for next process
            if row[0].value == 'Process':
                start = deepcopy(row_offset)
                break
            elif row[0].value == 'Project Input parameters':
                projectInputParametersPosition = deepcopy(row_offset)
            elif row[0].value == 'Project Calculated parameters':
                projectCalculatedParameters = deepcopy(row_offset)
            else:
                row_offset += 1
        if row_offset == ws.max_row:
            pass
            #last UP
        parameters = DataFrame()
        if projectInputParametersPosition and projectCalculatedParameters:
            for row in ws.iter_rows(range_string = dim, row_offset = projectInputParametersPosition+1):
                if row[0].value == None:
                    break
                else:
                    to_add = {'variableName': str(row[0].value), 
                              'amount': float(row[1].value), 
                                'uncertaintyType': str(row[2].value), 
                                'variance': float(row[3].value), 
                                'comment': str(row[9].value), 
                                'variableNameLength': len(str(row[0].value))}
                    parameters = gff.update_df(parameters, {len(parameters): to_add})
        if len(parameters) > 0:
            parameters.sort(columns = 'variableNameLength', ascending = False, inplace = True)
            for row in ws.iter_rows(range_string = dim, row_offset = projectCalculatedParameters+1):
                if row[0].value == None:
                    break
                elif row[0].value == 'Project Calculated parameters':
                    pass
                else:
                    1/0 #to fix later if it occurs
        if len(parameters) == 0:
            to_add = {'variableName': 'LUC_crop_specific', 
                              'amount': 1., 
                                'uncertaintyType': 'Undefined', 
                                'variance': '', 
                                'comment': 'dummy value, please edit', 
                                'variableNameLength': 10}
            parameters = gff.update_df(parameters, {len(parameters): to_add})
        datasets = []
        for start, end in dataset_positions:
            rows = []
            counter = start
            for row in ws.iter_rows(range_string = dim, row_offset = start):
                counter += 1
                if counter == end:
                    break
                line = []
                for cell in row:
                    line.append(cell.value)
                rows.append(line)
            datasets.append(Dataset(rows, filename, parameters = parameters))
    else:
        1/0
    if len(datasets) == 0:
        1/0
    elif len(datasets) == 1:
        datasets = datasets[0]
    return datasets

Example #4

Show file

File: simapro_dataset_from_csv.py Project: ecoinvent/common_objects

 def convertion_template(self, MD, folder_template, rosettas, new_MD, unit_conflicts):
     metainfo_attribute_mapping = {
        'geographyShortname': 'geography', 
        'generalComment': 'comment', 
        'technologyComments': 'technology',
        'samplingProcedure': 'collectionMethod', 
        }
     metainfo_attribute_without_mapping = ['includedActivitiesEnd', 
                 'allocationComments', 'technologyLevel', 'macroEconomicComment', 
                 'tags', 'includedActivitiesStart', 'geographyComments', 
                 'timePeriodComments', 'extrapolations', 'synonyms']
     line_for_metainfo_template = {}
     for attr in metainfo_attribute_mapping:
         line_for_metainfo_template[attr] = getattr(self, metainfo_attribute_mapping[attr])
     line_for_metainfo_template.update(dict(zip(metainfo_attribute_without_mapping, 
                      ['']*len(metainfo_attribute_without_mapping))))
     for field in ['startDate', 'endDate', 'dataEntryBy', 'dataGenerator']:
         if hasattr(self, field):
             line_for_metainfo_template[field] = getattr(self, field)
         else:
             line_for_metainfo_template[field] = ''
     line_for_metainfo_template['id'] = str(uuid4())
     line_for_metainfo_template['activityName original'] = self.processName
     if self.processName not in set(rosettas['activityName'].index):
         1/0
     self.activityName = rosettas['activityName'].loc[self.processName, 'ecoinvent name']
     if not self.activityName in MD['ActivityNames'].index:
         if len(new_MD['ActivityNames']) == 0 or self.activityName not in set(new_MD['ActivityNames']['activityName']):
             to_add = {len(new_MD['ActivityNames']): {
                 'original activityName': self.processName, 
                 'activityName': self.activityName, 
                 'id': str(uuid4())}}
             new_MD['ActivityNames'] = gff.update_df(new_MD['ActivityNames'], to_add)
     #f.processName = rosettas['ie'].loc[self.ReferenceProduct[0].name, 'ecoinvent activityLink activityName']
     line_for_metainfo_template['activityName'] = self.activityName
     line_for_metainfo_template['accessRestrictedTo'] = 'Licensees'
     line_for_metainfo_template['technologyLevel'] = 'Current'
     line_for_metainfo_template['dataEntryBy'] = line_for_metainfo_template['dataEntryBy'].strip()
     line_for_metainfo_template = DataFrame({0: line_for_metainfo_template}).transpose()
     for field in ['dataEntryBy', 'dataGenerator']:
         personName = line_for_metainfo_template.iloc[0][field]
         if not gff.isempty(personName) and personName not in set(MD['persons']['name']):
             if len(new_MD['persons']) == 0 or personName not in set(new_MD['persons']['name']):
                 to_add = {'id': str(uuid4()), 
                           'name': personName}
                 cols = ['address', 'companyCode', 'companyName', 'email', 'telefax', 'telephone']
                 to_add.update(dict(zip(cols, ['']*len(cols))))
                 new_MD['persons'] = gff.update_df(new_MD['persons'], 
                     {len(new_MD['persons']): to_add})
     exchanges_template = DataFrame()
     properties_template = DataFrame()
     exchange_attribute_mapping = {'group': 'group', 'name': 'name', 
                 'compartment': 'compartment', 'subcompartment': 'subcompartment', 
                 'unitName': 'unitName', 'amount': 'amount', 'comments': 'comment', 
                 'mathematicalRelation': 'mathematicalRelation', 
                 'variableName': 'variableName', 'variance': 'variance', 
                 'sourceId': 'sourceId', 
                 'original name': 'name', 'original compartment': 'compartment', 
                 'original subcompartment': 'subcompartment'
                 }
     pedigree_fields = ['reliability', 'completeness', 'temporalCorrelation', 
         'geographicalCorrelation', 'furtherTechnologyCorrelation']
     exchange_attribute_without_mapping = ['activityLink activityName', 
         'activityLink geography', 
         'productionVolumeAmount', 'productionVolumeComment', 
         'productionVolumeSourceId', 'productionVolumeMathematicalRelation', 
         'uncertainty comments', 'uncertainty type production volume', 
         'variance production volume', 'uncertainty comments production volume', 
         'reliability production volume', 'completeness production volume', 
         'temporalCorrelation production volume', 'geographicalCorrelation production volume', 
         'furtherTechnologyCorrelation production volume', ]
     group_list = ['ReferenceProduct', 'ByProduct', 'FromTechnosphere', 'ToEnvironment', 
                   'FromEnvironment']
     for group in group_list:
         for exc in getattr(self, group):
             exchange_attributes = {}
             for attr in exchange_attribute_mapping:
                 exchange_attributes[attr] = getattr(exc, exchange_attribute_mapping[attr])
             if 'Environment' in group:
                 if exchange_attributes['compartment'] == 'air':
                     exchange_attributes['compartment'] = 'Air'
                 elif exchange_attributes['compartment'] == 'water':
                     exchange_attributes['compartment'] = 'Water'
                 elif exchange_attributes['compartment'] == 'soil':
                     exchange_attributes['compartment'] = 'Soil'
                 elif exchange_attributes['compartment'] == 'Non material emissions':
                     pass
                 elif exchange_attributes['compartment'] == 'natural resources':
                     exchange_attributes['compartment'] = 'Raw'
                 else:
                     1/0
                 if exchange_attributes['subcompartment'] == 'unspecified':
                     exchange_attributes['subcompartment'] = '(unspecified)'
                 ee = (exchange_attributes['name'], 
                           exchange_attributes['compartment'], 
                             exchange_attributes['subcompartment'])
                 if ee not in set(rosettas['ee'].index):
                     print ee
                     1/0
                 else:
                     sel = rosettas['ee'].loc[ee]
                     if type(sel) == pandas.core.series.Series:
                         sel = DataFrame(sel).transpose()
                     ee_ = tuple(sel.iloc[0][['name ecoinvent', 
                                'compartment ecoinvent', 'subcompartment ecoinvent']])
                     if ee_ in set(MD['ElementaryExchanges'].index):
                         cols = ['name', 'compartment', 'subcompartment', 'unitName']
                         exchange_attributes.update(dict(zip(cols, 
                             list(MD['ElementaryExchanges'].loc[ee_, cols]))))
                     else:
                         if len(new_MD['ElementaryExchanges']) == 0 or gff.merge(ee_, ', ') not in set(new_MD['ElementaryExchanges']['index']):
                             to_add = {0: {'nameUUID': str(uuid4()), 
                                           'name': ee_[0], 
                                           'compartment': ee_[1], 
                                             'subcompartment': ee_[2], 
                                             'unitName': rosettas['unit'].loc[exc.unitName,'name'],  
                                             'casNumber': '', 
                                             'formula': '', 
                                             'synonyms': '', 
                                             'index': gff.merge(ee_, ', '), 
                                             'name original': ee[0], 
                                             'compartment original': ee[1], 
                                             'subcompartment original': ee[2]}}
                             new_MD['ElementaryExchanges'] = gff.update_df(
                                 new_MD['ElementaryExchanges'], to_add)
                         sel2 = new_MD['ElementaryExchanges'][new_MD['ElementaryExchanges']['index'] == 
                             gff.merge(ee_, ', ')].iloc[0]
                         cols = ['name', 'compartment', 'subcompartment', 'unitName']
                         exchange_attributes.update(dict(zip(cols, list(sel2[cols]))))
                     if type(sel) != pandas.core.frame.DataFrame:
                         1/0
                     if len(sel) != 1:
                         1/0
                     sel = sel.iloc[0]
             else:#intermediate exchange
                 if exchange_attributes['name'] not in set(rosettas['ie'].index):
                     print self.filename
                     print exchange_attributes['name']
                     1/0
                 else:
                     sel = rosettas['ie'].loc[exchange_attributes['name']]
                     if sel['ecoinvent product name'] in set(MD['IntermediateExchanges'].index):
                         exchange_attributes['name'] = sel['ecoinvent product name']
                         exchange_attributes['potential activityLink activityName'] = sel['ecoinvent activityLink activityName']
                         exchange_attributes['potential activityLink geography'] = sel['ecoinvent activityLink geography']
                         exchange_attributes['unitName'] = MD['IntermediateExchanges'].loc[exchange_attributes['name'], 'unitName']
                     else:
                         exchange_attributes['name'] = sel['ecoinvent product name']
                         exchange_attributes['potential activityLink activityName'] = sel['ecoinvent activityLink activityName']
                         exchange_attributes['potential activityLink geography'] = sel['ecoinvent activityLink geography']
                         exchange_attributes['unitName'] = rosettas['unit'].loc[exc.unitName,'name']
                         if (len(new_MD['IntermediateExchanges']) == 0 or 
                                 sel['ecoinvent product name'] not in 
                                 set(new_MD['IntermediateExchanges']['name'])):
                             to_add = {0: {'nameUUID': str(uuid4()), 
                               'name original': exchange_attributes['original name'], 
                               'name': sel['ecoinvent product name'], 
                               'CAS': '', 
                               'comment': '', 
                               'By-product classification': '', 
                               'CPC': '', 
                               'ISIC rev.4 ecoinvent': '', 
                                 'synonyms': '', 
                                 'unitName': rosettas['unit'].loc[exc.unitName,'name']}}
                             new_MD['IntermediateExchanges'] = gff.update_df(
                                 new_MD['IntermediateExchanges'], to_add)
                         else:
                             pass #already in new_MD
             if len(exc.pedigreeMatrix) == 5:
                 for i in range(len(pedigree_fields)):
                     exchange_attributes[pedigree_fields[i]] = exc.pedigreeMatrix[i]
             exchange_attributes.update(dict(zip(
                 exchange_attribute_without_mapping, 
                 ['']*len(exchange_attribute_without_mapping))))
             for field in ['pageNumbers']:
                 exchange_attributes[field] = ''
             if exc.unitName != exchange_attributes['unitName']:
                 if exchange_attributes['unitName'] == rosettas['unit'].loc[exc.unitName,'name']:
                     exchange_attributes['amount'] = exchange_attributes['amount'] * rosettas['unit'].loc[exc.unitName,'conversion']
                 elif exc.unitName == 'kg' and exchange_attributes['unitName'] == 'm3' and 'Water' in exc.name:
                     exchange_attributes['amount'] = exchange_attributes['amount']/1000.
                 else:
                     to_add = {'filename': self.filename, 
                               'group': exc.group, 
                               'ecoinvent name': exchange_attributes['name'], 
                             'ecoinvent compartment': exchange_attributes['compartment'], 
                             'ecoinvent subcompartment': exchange_attributes['subcompartment'], 
                             'original unit': exc.unitName, 
                             'ecoinvent unit': exchange_attributes['unitName']}
                     unit_conflicts = gff.update_df(unit_conflicts, to_add)
             for field in ['productionVolumeAmount', 'productionVolumeComment', 
                     'productionVolumeSourceId', 'productionVolumeMathematicalRelation']:
                 if hasattr(exc, field) and not gff.isempty(getattr(exc, field)):
                     exchange_attributes[field] = getattr(exc, field)
             exchanges_template = gff.update_df(exchanges_template, exchange_attributes)
     cols = ['group', 'original name', 'name', 'compartment', 'subcompartment', 'activityLink activityName', 
         'activityLink geography', 'unitName', 'amount', 'variableName', 
         'mathematicalRelation', 'sourceId', 'pageNumbers', 'comments', 
         'productionVolumeAmount', 'productionVolumeComment', 'productionVolumeSourceId', 
         'productionVolumeMathematicalRelation', 'uncertainty type', 'variance', 
         'uncertainty comments', 'reliability', 'completeness', 'temporalCorrelation', 
         'geographicalCorrelation', 'furtherTechnologyCorrelation', 'uncertainty type production volume', 
         'variance production volume', 'uncertainty comments production volume', 
         'reliability production volume', 'completeness production volume', 'temporalCorrelation production volume', 
         'geographicalCorrelation production volume', 'furtherTechnologyCorrelation production volume', 
         'original compartment', 'original subcompartment', 
         'potential activityLink activityName', 'potential activityLink geography']
     if 'potential activityLink activityName' not in exchanges_template.columns:
         exchanges_template['potential activityLink activityName'] = ''
         exchanges_template['potential activityLink geography'] = ''
     dfs = [{'df': exchanges_template, 'cols': cols, 'sheetname': 'exchanges'}]
     parameters_template = DataFrame()
     for p in self.parameters:
         to_add = {}
         for attr in gff.list_attributes(p):
             to_add[attr] = getattr(p, attr)
         parameters_template = gff.update_df(parameters_template, 
             {len(parameters_template): to_add})
         if p.name not in set(MD['parameters'].index):
             if 'parameters' not in new_MD:
                 new_MD['parameters'] = DataFrame()
             if len(new_MD['parameters']) == 0 or p.name not in set(new_MD['parameters']['name']):
                 to_add = {'name': p.name, 
                           'defaultVariableName': p.variableName, 
                           'unitName': p.unitName, 
                           'comment': '', 
                           'id': str(uuid4())}
                 new_MD['parameters'] = gff.update_df(new_MD['parameters'], to_add)
     cols = ['name', 'variableName', 'amount', 'unitName', 'uncertaintyType', 'comment']
     cols.extend(pedigree_fields)
     for col in cols:
         if col not in parameters_template.columns:
             parameters_template[col] = ''
     dfs.append({'df': parameters_template, 'cols': cols, 'sheetname': 'parameters'})
     if not gff.isempty(exchanges_template.iloc[0]['original name']):
         filename = exchanges_template.iloc[0]['original name'].split('|')[0]
         for c in ['/']:
             filename = filename.replace(c, '_')
         filename += '.xlsx'
     else:
         1/0
     print 'saving', filename
     gff.write_DataFrame(dfs, folder_template, filename)
     line_for_metainfo_template['exchange template filename'] = filename
     sources = set(exchanges_template['sourceId'])
     if 'publishedSourceId' in set(line_for_metainfo_template.columns) and not gff.isempty(line_for_metainfo_template['publishedSourceId']):
         1/0
         sources.add(line_for_metainfo_template['publishedSourceId'])
     if '' in sources:
         sources.remove('')
     
     if len(sources) > 0:
         if 'Sources' not in new_MD:
             new_MD['Sources'] = DataFrame()
         elif 'id' in list(new_MD['Sources'].columns):
             sources = sources.difference(set(new_MD['Sources']['id']))
     if len(sources) > 0:
         cols = ['sourceType', 'title', 'firstAuthor', 'additionalAuthors', 'year', 
                  'pageNumbers', 'nameOfEditors', 'titleOfAnthology', 
                  'placeOfPublications', 'publisher', 'journal', 'volumeNo', 
                  'issueNo', 'generalComment']
         for s in sources:
             to_add = dict(zip(cols, ['']*len(cols)))
             to_add['id'] = s
             new_MD['Sources'] = gff.update_df(new_MD['Sources'], to_add)
     return (line_for_metainfo_template, exchanges_template, 
             properties_template, new_MD, unit_conflicts)

Example #5

Show file

File: simapro_dataset_from_csv.py Project: ecoinvent/common_objects

 def __init__(self, rows, filename, parameters = DataFrame()):
     self.filename = filename
     headers = ['Products', 'Materials/fuels', 
                'Electricity/heat', 'Waste to treatment']
     positions_ie = dict(zip(headers, [0]*len(headers)))
     headers = ['Resources', 'Emissions to air', 'Emissions to water', 
                'Emissions to soil']
     positions_ee = dict(zip(headers, [0]*len(headers)))
     headers = ['Avoided products', 'Final waste flows', 'Non material emissions', 
         'Social issues', 'Economic issues']
     positions_should_be_empty = dict(zip(headers, [0]*len(headers)))
     for i in range(len(rows)):
         if rows[i][0] in headers_metainfo:
             if rows[i+1][0] == None:
                 setattr(self, headers_metainfo[rows[i][0]], '')
             else:
                 setattr(self, headers_metainfo[rows[i][0]], rows[i+1][0])
         elif rows[i][0] in positions_ie:
             positions_ie[rows[i][0]] = i
         elif rows[i][0] in positions_ee:
             positions_ee[rows[i][0]] = i
         elif rows[i][0] in positions_should_be_empty:
             positions_should_be_empty[rows[i][0]] = i
             if rows[i+1][0] != None:
                 1/0
         elif rows[i][0] == 'Input parameters':
             InputParametersPosition = i
         elif rows[i][0] == 'Calculated parameters':
             CalculatedParameters = i
     for row in rows[InputParametersPosition+1:]:
         if row[0] == None:
             break
         else:
             to_add = {'variableName': str(row[0]), 
                       'amount': float(row[1]), 
                         'uncertaintyType': str(row[2]), 
                         'variance': float(row[3]), 
                         'comment': str(row[9]), 
                         'variableNameLength': len(str(row[0]))}
             parameters = gff.update_df(parameters, {len(parameters): to_add})
     if len(parameters) > 0:
         parameters.sort(columns = 'variableNameLength', ascending = False, inplace = True)
     for row in rows[CalculatedParameters+1:]:
         if row[0] == None:
             break
         else:
             1/0 #to fix later if it occurs
     for group in ['ReferenceProduct', 'FromEnvironment', 'ToEnvironment', 
                   'FromTechnosphere', 'ByProduct']:
         setattr(self, group, [])
     for i in range(positions_ie['Products']+1, 
             positions_should_be_empty['Avoided products']-1):
         if rows[i][0] != None:
             ref_exc = Exchange(rows[i], 'ReferenceProduct')
             self.ReferenceProduct.append(ref_exc)
     for i in range(positions_ie['Materials/fuels']+1, 
             positions_ee['Emissions to air']-1):
         if rows[i][0] != None and i != positions_ie['Electricity/heat']:
             exc = Exchange(rows[i], 'FromTechnosphere', parameters = parameters)
             self.FromTechnosphere.append(exc)
     for i in range(positions_ee['Resources']+1, positions_ie['Materials/fuels']-1):
         if not gff.isempty(rows[i][0]):
             exc = Exchange(rows[i], 'FromEnvironment', parameters = parameters)
             self.FromEnvironment.append(exc)
     for i in range(positions_ie['Waste to treatment']+1, 
             InputParametersPosition-1):
         if rows[i][0] != None:
             exc = Exchange(rows[i], 'ByProduct', parameters = parameters)
             self.FromTechnosphere.append(exc)
     for i in range(positions_ee['Emissions to air'], 
                    positions_should_be_empty['Final waste flows']-1):
         if gff.isempty(rows[i][0]):
             pass
         elif rows[i][0] in positions_ee:
             group = rows[i][0]
         else:
             exc = Exchange(rows[i], group, parameters = parameters)
             self.ToEnvironment.append(exc)
     self.parameters = []
     parameters.set_index('variableName', inplace = True)
     for variableName in parameters.index:
         self.parameters.append(Parameter(parameters.loc[variableName]))