Beispiel #1
0
 def convertion_template(self, MD, folder_template, rosettas, new_MD):
     #mapping[attribute_name_spold2] = attribute_name_spold1
     metainfo_attribute_mapping = {
        'geographyShortname': 'geographyShortname', 
        'generalComment': 'generalComment', 
        'includedActivitiesStart': 'includedProcesses', 
        'geographyComments': 'geographyComment', 
        'technologyComments': 'technologyComment',
        'timePeriodComments': 'timePeriodComment', 
        'samplingProcedure': 'samplingProcedure', 
        'extrapolations': 'extrapolations', 
        'synonyms': 'synonym', 
        'activityName': 'name'
        }
     metainfo_attribute_without_mapping = ['includedActivitiesEnd', 
         'allocationComments', 'technologyLevel', 'macroEconomicComment', 
         'tags']
     line_for_metainfo_template = {}
     for attr in metainfo_attribute_mapping:
         line_for_metainfo_template[attr] = getattr(self, metainfo_attribute_mapping[attr])
     if (line_for_metainfo_template['geographyShortname'] not in 
             set(MD['geographies']['shortname'])):
         line_for_metainfo_template['geographyShortname'] += ' is no longer valid.  Please replace with valid geography shortname from current MasterData.'
     line_for_metainfo_template.update(dict(zip(metainfo_attribute_without_mapping, 
              ['']*len(metainfo_attribute_without_mapping))))
     line_for_metainfo_template['startDate'] = '%s-01-01' % self.startYear
     line_for_metainfo_template['endDate'] = '%s-12-31' % self.endYear
     line_for_metainfo_template['id'] = str(uuid4())
     line_for_metainfo_template['dataEntryBy'] = ''
     line_for_metainfo_template['dataGenerator'] = ''
     if self.name not in MD['ActivityNames'].index:
         if len(new_MD['ActivityNames']) == 0 or self.name not in new_MD['ActivityNames'].index:
             to_add = {len(new_MD['ActivityNames']): {
                 'original activityName': self.name, 
                 'activityName': 'fix me', 
                 'id': str(uuid4())}}
             new_MD['ActivityNames'] = gff.update_df(new_MD['ActivityNames'], to_add)
         line_for_metainfo_template['activityName'] = 'fix me'
     line_for_metainfo_template['activityName source'] = self.name
     for p in self.persons:
         for field1, field2 in [('dataEntryBy', 'dataEntryBy'), 
                 ('dataGenerator', 'dataGeneratorPerson')]:
             if p.number == getattr(self, field2):
                 line_for_metainfo_template[field1] = p.name
                 if p.name not in set(MD['persons']['name']):
                     if len(new_MD['persons']) == 0 or p.name not in set(new_MD['persons']['name']):
                         if len(new_MD['persons']) > 0:
                             1/0
                         to_add = {}
                         for field in ['name', 'address', 'telephone', 'telefax', 
                                 'email', 'companyCode']:
                             to_add[field] = getattr(p, field)
                         if p.companyCode not in MD['companies']['code']:
                             if (len(new_MD['companies']) == 0 or 
                                 p.companyCode not in set(new_MD['companies']['code'])):
                                 to_add2 = {'id': str(uuid4()), 
                                        'name': '', 
                                        'code': p.companyCode, 
                                        'website': '', 
                                        'comment': ''}
                                 new_MD['companies'] = gff.update_df(
                                     new_MD['companies'], {len(
                                     new_MD['companies']): to_add2})
                                 to_add['id'] = to_add2['id']
                             else:
                                 to_add['id'] = new_MD['companies'][new_MD['companies']['code'] == p.companyCode].iloc[0]['id']
                         else:
                             to_add['id'] = MD['companies']['codes'][MD['companies']['codes'] == p.companyCode].iloc[0]['id']
                         new_MD['persons'] = gff.update_df(new_MD['persons'], {len(new_MD['persons']): to_add})
     for s in self.sources:
         fields = gff.list_attributes(s)
         fields.remove('text')
         fields.remove('number')
         if len(new_MD['Sources']) == 0 or s.title not in set(new_MD['Sources']['title']):
             to_add = {}
             for field in fields:
                 to_add[field] = getattr(s, field)
             to_add['comment'] = ''
             to_add['id'] = str(uuid4())
             new_MD['Sources'] = gff.update_df(new_MD['Sources'], 
                 {len(new_MD['Sources']): to_add})
         if s.number == self.referenceToPublishedSource:
             try:
                 uuid = to_add['id']
             except:
                 uuid = new_MD['Sources'][new_MD['Sources']['title'] == s.title].iloc[0]['id']
             line_for_metainfo_template['publishedSourceId'] = uuid
     line_for_metainfo_template['dataPublishedIn'] = self.dataPublishedIn
     line_for_metainfo_template = DataFrame({0: line_for_metainfo_template}).transpose()
     exchanges_template = DataFrame()
     properties_template = DataFrame()
     exchange_attribute_mapping = {'group': 'group', 'name': 'name', 
         'compartment': 'category', 'subcompartment': 'subCategory', 
         'unitName': 'unit', 'amount': 'meanValue', 'comments': 'generalComment', 
         }
     pedigree_fields = ['reliability', 'completeness', 'temporalCorrelation', 
         'geographicalCorrelation', 'furtherTechnologyCorrelation']
     exchange_attribute_without_mapping = ['activityLink activityName', 
         'activityLink geography', 'variableName', 'mathematicalRelation', 
         'productionVolumeAmount', 'productionVolumeComment', 
         'productionVolumeSourceId', 'productionVolumeMathematicalRelation', 
         'uncertainty comments', 'uncertainty type production volume', 
         'variance production volume', 'uncertainty comments production volume', 
         'reliability production volume', 'completeness production volume', 
         'temporalCorrelation production volume', 'geographicalCorrelation production volume', 
         'furtherTechnologyCorrelation production volume', ]
     for group in group_list:
         for exc in getattr(self, group):
             exchange_attributes = {}
             for attr in exchange_attribute_mapping:
                 exchange_attributes[attr] = getattr(exc, exchange_attribute_mapping[attr])
             if 'Environment' in group:
                 if exchange_attributes['compartment'] == 'air':
                     exchange_attributes['compartment'] = 'Air'
                 elif exchange_attributes['compartment'] == 'water':
                     exchange_attributes['compartment'] = 'Water'
                 elif exchange_attributes['compartment'] == 'soil':
                     exchange_attributes['compartment'] = 'Soil'
                 elif exchange_attributes['compartment'] == 'Non material emissions':
                     pass
                 else:
                     1/0
                 if exchange_attributes['subcompartment'] == 'unspecified':
                     exchange_attributes['subcompartment'] = '(unspecified)'
                 ee = (exchange_attributes['name'], 
                           exchange_attributes['compartment'], 
                             exchange_attributes['subcompartment'])
                 if ee not in set(rosettas['ee'].index):
                     for field in ['name', 'compartment', 'subcompartment']:
                         exchange_attributes['original %s' % field] = deepcopy(exchange_attributes[field])
                         exchange_attributes[field] = 'fix me'
                     if len(new_MD['ElementaryExchanges']) == 0 or gff.merge(ee, ', ') not in set(new_MD['ElementaryExchanges']['index']):
                         to_add = {0: {'nameUUID': str(uuid4()), 
                                       'name source': ee[0], 
                                       'name': '', 
                                       'compartment source': ee[1], 
                                       'compartment': '', 
                                       'subcompartment source': ee[2], 
                                         'subcompartment': '', 
                                         'unitName': rosettas['unit'].loc[exc.unit,'name'],  
                                         'casNumber': '', 
                                         'formula': '', 
                                         'synonyms': '', 
                                         'index': gff.merge(ee, ', ')}}
                         new_MD['ElementaryExchanges'] = gff.update_df(
                             new_MD['ElementaryExchanges'], to_add)
                         exchange_attributes['unitName'] = rosettas['unit'].loc[exc.unit,'name']
                 else:
                     sel = rosettas['ee'].loc[ee]
                     if type(sel) != pandas.core.frame.DataFrame:
                         1/0
                     if len(sel) != 1:
                         1/0
                     sel = sel.iloc[0]
                     for field in ['name', 'compartment', 'subcompartment']:
                         exchange_attributes[field] = sel['%s ecoinvent' % field]
                         exchange_attributes['original %s' % field] = ''
                     if not sel['unit equal']:
                         1/0
             else:#intermediate exchange
                 if exchange_attributes['name'] not in set(rosettas['ie'].index):
                     exchange_attributes['original name'] = deepcopy(exchange_attributes['name'])
                     for field in ['compartment', 'subcompartment']:
                         exchange_attributes[field] = ''
                     exchange_attributes['name'] = 'fix me'
                     if len(new_MD['IntermediateExchanges']) == 0 or exchange_attributes['spold1 name'] not in set(new_MD['IntermediateExchanges']['name spold1']):
                         to_add = {0: {'nameUUID': str(uuid4()), 
                           'name source': exchange_attributes['original name'], 
                           'name': '', 
                           'CAS': '', 
                           'comment': '', 
                           'By-product classification': '', 
                           'CPC': '', 
                           'ISIC rev.4 ecoinvent': '', 
                             'synonyms': '', 
                             'unitName': rosettas['unit'].loc[exc.unit,'name']}}
                         new_MD['IntermediateExchanges'] = gff.update_df(
                             new_MD['IntermediateExchanges'], to_add)
                         exchange_attributes['unitName'] = rosettas['unit'].loc[exc.unit,'name']
                 else:
                     for field in ['name', 'compartment', 'subcompartment']:
                         exchange_attributes['original %s' % field] = ''
                     sel = rosettas['ie'].loc[exchange_attributes['name']]
                     exchange_attributes['name'] = sel['ecoinvent product name']
                     exchange_attributes['potential activityLink activityName'] = sel['ecoinvent activityLink activityName']
                     exchange_attributes['potential activityLink geography'] = sel['ecoinvent activityLink geography']
                     new_unit = MD['IntermediateExchanges'][
                             MD['IntermediateExchanges']['name'] == 
                             exchange_attributes['name']].iloc[0]['unitName']
                     if new_unit != exchange_attributes['unitName']:
                         if new_unit == 'unit' and exchange_attributes['unitName'] == 'p':
                             exchange_attributes['unitName'] = new_unit
                         elif new_unit == 'm*year' and exchange_attributes['unitName'] == 'my':
                             exchange_attributes['unitName'] = new_unit
                         else:
                             1/0
             exchange_attributes['variance'] = ''
             if not gff.isempty(exchange_attributes['comments']):
                 if ',BU:' in exchange_attributes['comments']:
                     pedigreeMatrix, rest = exchange_attributes['comments'].split(',BU:')
                     pedigreeMatrix = pedigreeMatrix[1:].split(',')[:5]
                     exchange_attributes['variance'] = np.log(float(rest.split(');')[0]))**2.
                     if exchange_attributes['variance'] > 5.:
                         1/0
                     exchange_attributes['comments'] = gff.merge(rest.split(');')[1:], ');')
                 else:
                     m = pedigree_re.search(exchange_attributes['comments'])
                     if m:
                         1/0
             else:
                 pedigreeMatrix = 5*['']
             if gff.isempty(exc.uncertaintyType) or exc.group == 'ReferenceProduct':
                 exchange_attributes['uncertainty type'] = ''
             elif exc.uncertaintyType == 0:
                 exchange_attributes['uncertainty type'] = 'undefined'
                 exchange_attributes['variance'] = ''
             elif exc.uncertaintyType == 1:
                 exchange_attributes['uncertainty type'] = 'lognormal'
             elif exc.uncertaintyType == 2:
                 1/0
             elif exc.uncertaintyType == 3:
                 1/0
             elif exc.uncertaintyType == 4:
                 1/0
             else:
                 1/0
                 #add info in log about missing pedigree
             for i in range(len(pedigree_fields)):
                 if pedigreeMatrix[i] == '':
                     exchange_attributes[pedigree_fields[i]] = ''
                 elif pedigreeMatrix[i] in ['1', '2', '3', '4', '5']:
                     exchange_attributes[pedigree_fields[i]] = int(pedigreeMatrix[i])
                 else:
                     1/0
             exchange_attributes.update(dict(zip(
                 exchange_attribute_without_mapping, 
                 ['']*len(exchange_attribute_without_mapping))))
             for field in ['sourceId', 'pageNumbers',]:
                 exchange_attributes[field] = ''
             if not gff.isempty(exc.referenceToSource):
                 1/0
             exchanges_template = gff.update_df(exchanges_template, 
                 {len(exchanges_template): exchange_attributes})
     cols = ['group', 'name', 'compartment', 'subcompartment', 'activityLink activityName', 
         'activityLink geography', 'unitName', 'amount', 'variableName', 
         'mathematicalRelation', 'sourceId', 'pageNumbers', 'comments', 
         'productionVolumeAmount', 'productionVolumeComment', 'productionVolumeSourceId', 
         'productionVolumeMathematicalRelation', 'uncertainty type', 'variance', 
         'uncertainty comments', 'reliability', 'completeness', 'temporalCorrelation', 
         'geographicalCorrelation', 'furtherTechnologyCorrelation', 'uncertainty type production volume', 
         'variance production volume', 'uncertainty comments production volume', 
         'reliability production volume', 'completeness production volume', 'temporalCorrelation production volume', 
         'geographicalCorrelation production volume', 'furtherTechnologyCorrelation production volume']
     cols.extend(['original name', 'original compartment', 'original subcompartment', 
         'potential activityLink activityName', 'potential activityLink geography'])
     if 'potential activityLink activityName' not in exchanges_template.columns:
         1/0                
     dfs = [{'df': exchanges_template, 'cols': cols, 'sheetname': 'exchanges'}]
     df_new_exchanges = exchanges_template[exchanges_template['name'] == 'fix me']
     if len(df_new_exchanges) > 0:
         for index in df_new_exchanges.index:
             sel = df_new_exchanges.loc[index]
             fields = ['group', 'name', 'compartment', 'subcompartment', 
                   'activityLink activityName', 'activityLink geography']
             baseline = dict(zip(fields, list(sel[fields])))
             fields2 = ['group', 'original name', 'original compartment', 'original subcompartment']
             baseline.update(dict(zip(fields, list(sel[fields2]))))
         cols = ['group', 'name', 'compartment', 'subcompartment', 'activityLink activityName', 
              'activityLink geography', 'property name', 'unitName', 'amount', 
              'comment', 'sourceId', 'mathematicalRelation', 'variableName']
         #dfs.append({'df': '', 'cols': cols, 'sheetname': 'properties'})
     else:
         pass
     logs = DataFrame()
     for index in exchanges_template.index:
         sel = exchanges_template.loc[index]
         fields = ['group', 'name', 'compartment', 'subcompartment', 
                   'activityLink activityName', 'activityLink geography']
         baseline = dict(zip(fields, list(sel[fields])))
         gravity = 'error'
         if sel['name'] == 'fix me':
             fields2 = ['group', 'original name', 'original compartment', 'original subcompartment']
             baseline.update(dict(zip(fields, list(sel[fields2]))))
             if 'Environment' in sel['group']:
                 message = 'Find appropriate exchange in MasterData'
             elif 'FromTechnoslphere' == sel['group']:
                 message = 'Find appropriate exchange in MasterData, and maybe activityLink activityName and geography'
             else:
                 message = 'Find appropriate exchange in MasterData and enter production volume'
             to_add = {'gravity': gravity, 
                       'line': index + 2, 
                       'message': message}
             to_add.update(baseline)
             logs = gff.update_df(logs, {len(logs): to_add})
             to_add = {'gravity': gravity, 
                       'line': index + 2, 
                       'message': 'If exchange is new, properties have to be entered in the "properties" tab'}
             to_add.update(baseline)
             logs = gff.update_df(logs, {len(logs): to_add})
         #warnings
         gravity = 'warning'
         if not gff.isempty(sel['variance']) and sel['variance'] == .0006:
             to_add = {'gravity': gravity, 
                       'line': index + 2, 
                       'message': 'additional variance by pedigreeMatrix > basic uncertainty.  Variance set to 0.0006. Check pedigree matrix and variance.'}
             to_add.update(baseline)
             logs = gff.update_df(logs, {len(logs): to_add})
         if gff.isempty(sel['sourceId']):
             to_add = {'gravity': gravity, 
                       'line': index + 2, 
                       'message': 'consider entering original information'}
             to_add.update(baseline)
             logs = gff.update_df(logs, {len(logs): to_add})
         if gff.isempty(sel['comments']):
             to_add = {'gravity': gravity, 
                       'line': index + 2, 
                       'message': 'consider entering comment'}
             to_add.update(baseline)
             logs = gff.update_df(logs, {len(logs): to_add})
         if sel['group'] == 'ReferenceProduct':
             for field in ['productionVolumeAmount', 'productionVolumeComment', 
                           'productionVolumeSourceId']:
                 if gff.isempty(sel[field]):
                     to_add = {'gravity': gravity, 
                       'line': index + 2, 
                       'message': 'consider entering %s' % field}
                     to_add.update(baseline)
                     logs = gff.update_df(logs, {len(logs): to_add})
             if gff.isempty(sel['uncertainty type production volume']):
                 to_add = {'gravity': gravity, 
                       'line': index + 2, 
                       'message': 'consider entering uncertainty information for production volume'}
                 to_add.update(baseline)
                 logs = gff.update_df(logs, {len(logs): to_add})
             else:
                 for field in ['variance production volume', 
                     'uncertainty comments', 'production volume', 
                     'reliability production volume', 'completeness production volume', 
                     'temporalCorrelation production volume', 'geographicalCorrelation production volume', 
                     'furtherTechnologyCorrelation production volume']:
                     if gff.isempty(sel[field]):
                         1/0
         else:
             if gff.isempty(sel['uncertainty type']):
                 1/0
             else:
                 fields = ['variance']
                 fields.extend(pedigree_fields)
                 for field in fields:
                     if gff.isempty(sel[field]):
                         1/0
     cols = ['gravity', 'line', 'group', 'name', 
             'compartment', 'subcompartment', 'message']
     dfs.append({'df': logs, 'cols': cols, 'sheetname': 'logs'})
     if not gff.isempty(exchanges_template.iloc[0]['original name']):
         filename = exchanges_template.iloc[0]['original name'].split('|')[0]
         filename += '.xlsx'
     else:
         1/0
     print 'saving', filename
     gff.write_DataFrame(dfs, folder_template, filename)
     line_for_metainfo_template['exchange template filename'] = filename
     return (line_for_metainfo_template, exchanges_template, 
             properties_template, new_MD)
 def convertion_template(self, MD, folder_template, rosettas, new_MD, unit_conflicts):
     metainfo_attribute_mapping = {
        'geographyShortname': 'geography', 
        'generalComment': 'comment', 
        'technologyComments': 'technology',
        'samplingProcedure': 'collectionMethod', 
        }
     metainfo_attribute_without_mapping = ['includedActivitiesEnd', 
                 'allocationComments', 'technologyLevel', 'macroEconomicComment', 
                 'tags', 'includedActivitiesStart', 'geographyComments', 
                 'timePeriodComments', 'extrapolations', 'synonyms']
     line_for_metainfo_template = {}
     for attr in metainfo_attribute_mapping:
         line_for_metainfo_template[attr] = getattr(self, metainfo_attribute_mapping[attr])
     line_for_metainfo_template.update(dict(zip(metainfo_attribute_without_mapping, 
                      ['']*len(metainfo_attribute_without_mapping))))
     for field in ['startDate', 'endDate', 'dataEntryBy', 'dataGenerator']:
         if hasattr(self, field):
             line_for_metainfo_template[field] = getattr(self, field)
         else:
             line_for_metainfo_template[field] = ''
     line_for_metainfo_template['id'] = str(uuid4())
     line_for_metainfo_template['activityName original'] = self.processName
     if self.processName not in set(rosettas['activityName'].index):
         1/0
     self.activityName = rosettas['activityName'].loc[self.processName, 'ecoinvent name']
     if not self.activityName in MD['ActivityNames'].index:
         if len(new_MD['ActivityNames']) == 0 or self.activityName not in set(new_MD['ActivityNames']['activityName']):
             to_add = {len(new_MD['ActivityNames']): {
                 'original activityName': self.processName, 
                 'activityName': self.activityName, 
                 'id': str(uuid4())}}
             new_MD['ActivityNames'] = gff.update_df(new_MD['ActivityNames'], to_add)
     #f.processName = rosettas['ie'].loc[self.ReferenceProduct[0].name, 'ecoinvent activityLink activityName']
     line_for_metainfo_template['activityName'] = self.activityName
     line_for_metainfo_template['accessRestrictedTo'] = 'Licensees'
     line_for_metainfo_template['technologyLevel'] = 'Current'
     line_for_metainfo_template['dataEntryBy'] = line_for_metainfo_template['dataEntryBy'].strip()
     line_for_metainfo_template = DataFrame({0: line_for_metainfo_template}).transpose()
     for field in ['dataEntryBy', 'dataGenerator']:
         personName = line_for_metainfo_template.iloc[0][field]
         if not gff.isempty(personName) and personName not in set(MD['persons']['name']):
             if len(new_MD['persons']) == 0 or personName not in set(new_MD['persons']['name']):
                 to_add = {'id': str(uuid4()), 
                           'name': personName}
                 cols = ['address', 'companyCode', 'companyName', 'email', 'telefax', 'telephone']
                 to_add.update(dict(zip(cols, ['']*len(cols))))
                 new_MD['persons'] = gff.update_df(new_MD['persons'], 
                     {len(new_MD['persons']): to_add})
     exchanges_template = DataFrame()
     properties_template = DataFrame()
     exchange_attribute_mapping = {'group': 'group', 'name': 'name', 
                 'compartment': 'compartment', 'subcompartment': 'subcompartment', 
                 'unitName': 'unitName', 'amount': 'amount', 'comments': 'comment', 
                 'mathematicalRelation': 'mathematicalRelation', 
                 'variableName': 'variableName', 'variance': 'variance', 
                 'sourceId': 'sourceId', 
                 'original name': 'name', 'original compartment': 'compartment', 
                 'original subcompartment': 'subcompartment'
                 }
     pedigree_fields = ['reliability', 'completeness', 'temporalCorrelation', 
         'geographicalCorrelation', 'furtherTechnologyCorrelation']
     exchange_attribute_without_mapping = ['activityLink activityName', 
         'activityLink geography', 
         'productionVolumeAmount', 'productionVolumeComment', 
         'productionVolumeSourceId', 'productionVolumeMathematicalRelation', 
         'uncertainty comments', 'uncertainty type production volume', 
         'variance production volume', 'uncertainty comments production volume', 
         'reliability production volume', 'completeness production volume', 
         'temporalCorrelation production volume', 'geographicalCorrelation production volume', 
         'furtherTechnologyCorrelation production volume', ]
     group_list = ['ReferenceProduct', 'ByProduct', 'FromTechnosphere', 'ToEnvironment', 
                   'FromEnvironment']
     for group in group_list:
         for exc in getattr(self, group):
             exchange_attributes = {}
             for attr in exchange_attribute_mapping:
                 exchange_attributes[attr] = getattr(exc, exchange_attribute_mapping[attr])
             if 'Environment' in group:
                 if exchange_attributes['compartment'] == 'air':
                     exchange_attributes['compartment'] = 'Air'
                 elif exchange_attributes['compartment'] == 'water':
                     exchange_attributes['compartment'] = 'Water'
                 elif exchange_attributes['compartment'] == 'soil':
                     exchange_attributes['compartment'] = 'Soil'
                 elif exchange_attributes['compartment'] == 'Non material emissions':
                     pass
                 elif exchange_attributes['compartment'] == 'natural resources':
                     exchange_attributes['compartment'] = 'Raw'
                 else:
                     1/0
                 if exchange_attributes['subcompartment'] == 'unspecified':
                     exchange_attributes['subcompartment'] = '(unspecified)'
                 ee = (exchange_attributes['name'], 
                           exchange_attributes['compartment'], 
                             exchange_attributes['subcompartment'])
                 if ee not in set(rosettas['ee'].index):
                     print ee
                     1/0
                 else:
                     sel = rosettas['ee'].loc[ee]
                     if type(sel) == pandas.core.series.Series:
                         sel = DataFrame(sel).transpose()
                     ee_ = tuple(sel.iloc[0][['name ecoinvent', 
                                'compartment ecoinvent', 'subcompartment ecoinvent']])
                     if ee_ in set(MD['ElementaryExchanges'].index):
                         cols = ['name', 'compartment', 'subcompartment', 'unitName']
                         exchange_attributes.update(dict(zip(cols, 
                             list(MD['ElementaryExchanges'].loc[ee_, cols]))))
                     else:
                         if len(new_MD['ElementaryExchanges']) == 0 or gff.merge(ee_, ', ') not in set(new_MD['ElementaryExchanges']['index']):
                             to_add = {0: {'nameUUID': str(uuid4()), 
                                           'name': ee_[0], 
                                           'compartment': ee_[1], 
                                             'subcompartment': ee_[2], 
                                             'unitName': rosettas['unit'].loc[exc.unitName,'name'],  
                                             'casNumber': '', 
                                             'formula': '', 
                                             'synonyms': '', 
                                             'index': gff.merge(ee_, ', '), 
                                             'name original': ee[0], 
                                             'compartment original': ee[1], 
                                             'subcompartment original': ee[2]}}
                             new_MD['ElementaryExchanges'] = gff.update_df(
                                 new_MD['ElementaryExchanges'], to_add)
                         sel2 = new_MD['ElementaryExchanges'][new_MD['ElementaryExchanges']['index'] == 
                             gff.merge(ee_, ', ')].iloc[0]
                         cols = ['name', 'compartment', 'subcompartment', 'unitName']
                         exchange_attributes.update(dict(zip(cols, list(sel2[cols]))))
                     if type(sel) != pandas.core.frame.DataFrame:
                         1/0
                     if len(sel) != 1:
                         1/0
                     sel = sel.iloc[0]
             else:#intermediate exchange
                 if exchange_attributes['name'] not in set(rosettas['ie'].index):
                     print self.filename
                     print exchange_attributes['name']
                     1/0
                 else:
                     sel = rosettas['ie'].loc[exchange_attributes['name']]
                     if sel['ecoinvent product name'] in set(MD['IntermediateExchanges'].index):
                         exchange_attributes['name'] = sel['ecoinvent product name']
                         exchange_attributes['potential activityLink activityName'] = sel['ecoinvent activityLink activityName']
                         exchange_attributes['potential activityLink geography'] = sel['ecoinvent activityLink geography']
                         exchange_attributes['unitName'] = MD['IntermediateExchanges'].loc[exchange_attributes['name'], 'unitName']
                     else:
                         exchange_attributes['name'] = sel['ecoinvent product name']
                         exchange_attributes['potential activityLink activityName'] = sel['ecoinvent activityLink activityName']
                         exchange_attributes['potential activityLink geography'] = sel['ecoinvent activityLink geography']
                         exchange_attributes['unitName'] = rosettas['unit'].loc[exc.unitName,'name']
                         if (len(new_MD['IntermediateExchanges']) == 0 or 
                                 sel['ecoinvent product name'] not in 
                                 set(new_MD['IntermediateExchanges']['name'])):
                             to_add = {0: {'nameUUID': str(uuid4()), 
                               'name original': exchange_attributes['original name'], 
                               'name': sel['ecoinvent product name'], 
                               'CAS': '', 
                               'comment': '', 
                               'By-product classification': '', 
                               'CPC': '', 
                               'ISIC rev.4 ecoinvent': '', 
                                 'synonyms': '', 
                                 'unitName': rosettas['unit'].loc[exc.unitName,'name']}}
                             new_MD['IntermediateExchanges'] = gff.update_df(
                                 new_MD['IntermediateExchanges'], to_add)
                         else:
                             pass #already in new_MD
             if len(exc.pedigreeMatrix) == 5:
                 for i in range(len(pedigree_fields)):
                     exchange_attributes[pedigree_fields[i]] = exc.pedigreeMatrix[i]
             exchange_attributes.update(dict(zip(
                 exchange_attribute_without_mapping, 
                 ['']*len(exchange_attribute_without_mapping))))
             for field in ['pageNumbers']:
                 exchange_attributes[field] = ''
             if exc.unitName != exchange_attributes['unitName']:
                 if exchange_attributes['unitName'] == rosettas['unit'].loc[exc.unitName,'name']:
                     exchange_attributes['amount'] = exchange_attributes['amount'] * rosettas['unit'].loc[exc.unitName,'conversion']
                 elif exc.unitName == 'kg' and exchange_attributes['unitName'] == 'm3' and 'Water' in exc.name:
                     exchange_attributes['amount'] = exchange_attributes['amount']/1000.
                 else:
                     to_add = {'filename': self.filename, 
                               'group': exc.group, 
                               'ecoinvent name': exchange_attributes['name'], 
                             'ecoinvent compartment': exchange_attributes['compartment'], 
                             'ecoinvent subcompartment': exchange_attributes['subcompartment'], 
                             'original unit': exc.unitName, 
                             'ecoinvent unit': exchange_attributes['unitName']}
                     unit_conflicts = gff.update_df(unit_conflicts, to_add)
             for field in ['productionVolumeAmount', 'productionVolumeComment', 
                     'productionVolumeSourceId', 'productionVolumeMathematicalRelation']:
                 if hasattr(exc, field) and not gff.isempty(getattr(exc, field)):
                     exchange_attributes[field] = getattr(exc, field)
             exchanges_template = gff.update_df(exchanges_template, exchange_attributes)
     cols = ['group', 'original name', 'name', 'compartment', 'subcompartment', 'activityLink activityName', 
         'activityLink geography', 'unitName', 'amount', 'variableName', 
         'mathematicalRelation', 'sourceId', 'pageNumbers', 'comments', 
         'productionVolumeAmount', 'productionVolumeComment', 'productionVolumeSourceId', 
         'productionVolumeMathematicalRelation', 'uncertainty type', 'variance', 
         'uncertainty comments', 'reliability', 'completeness', 'temporalCorrelation', 
         'geographicalCorrelation', 'furtherTechnologyCorrelation', 'uncertainty type production volume', 
         'variance production volume', 'uncertainty comments production volume', 
         'reliability production volume', 'completeness production volume', 'temporalCorrelation production volume', 
         'geographicalCorrelation production volume', 'furtherTechnologyCorrelation production volume', 
         'original compartment', 'original subcompartment', 
         'potential activityLink activityName', 'potential activityLink geography']
     if 'potential activityLink activityName' not in exchanges_template.columns:
         exchanges_template['potential activityLink activityName'] = ''
         exchanges_template['potential activityLink geography'] = ''
     dfs = [{'df': exchanges_template, 'cols': cols, 'sheetname': 'exchanges'}]
     parameters_template = DataFrame()
     for p in self.parameters:
         to_add = {}
         for attr in gff.list_attributes(p):
             to_add[attr] = getattr(p, attr)
         parameters_template = gff.update_df(parameters_template, 
             {len(parameters_template): to_add})
         if p.name not in set(MD['parameters'].index):
             if 'parameters' not in new_MD:
                 new_MD['parameters'] = DataFrame()
             if len(new_MD['parameters']) == 0 or p.name not in set(new_MD['parameters']['name']):
                 to_add = {'name': p.name, 
                           'defaultVariableName': p.variableName, 
                           'unitName': p.unitName, 
                           'comment': '', 
                           'id': str(uuid4())}
                 new_MD['parameters'] = gff.update_df(new_MD['parameters'], to_add)
     cols = ['name', 'variableName', 'amount', 'unitName', 'uncertaintyType', 'comment']
     cols.extend(pedigree_fields)
     for col in cols:
         if col not in parameters_template.columns:
             parameters_template[col] = ''
     dfs.append({'df': parameters_template, 'cols': cols, 'sheetname': 'parameters'})
     if not gff.isempty(exchanges_template.iloc[0]['original name']):
         filename = exchanges_template.iloc[0]['original name'].split('|')[0]
         for c in ['/']:
             filename = filename.replace(c, '_')
         filename += '.xlsx'
     else:
         1/0
     print 'saving', filename
     gff.write_DataFrame(dfs, folder_template, filename)
     line_for_metainfo_template['exchange template filename'] = filename
     sources = set(exchanges_template['sourceId'])
     if 'publishedSourceId' in set(line_for_metainfo_template.columns) and not gff.isempty(line_for_metainfo_template['publishedSourceId']):
         1/0
         sources.add(line_for_metainfo_template['publishedSourceId'])
     if '' in sources:
         sources.remove('')
     
     if len(sources) > 0:
         if 'Sources' not in new_MD:
             new_MD['Sources'] = DataFrame()
         elif 'id' in list(new_MD['Sources'].columns):
             sources = sources.difference(set(new_MD['Sources']['id']))
     if len(sources) > 0:
         cols = ['sourceType', 'title', 'firstAuthor', 'additionalAuthors', 'year', 
                  'pageNumbers', 'nameOfEditors', 'titleOfAnthology', 
                  'placeOfPublications', 'publisher', 'journal', 'volumeNo', 
                  'issueNo', 'generalComment']
         for s in sources:
             to_add = dict(zip(cols, ['']*len(cols)))
             to_add['id'] = s
             new_MD['Sources'] = gff.update_df(new_MD['Sources'], to_add)
     return (line_for_metainfo_template, exchanges_template, 
             properties_template, new_MD, unit_conflicts)
 def __init__(self, row, group, parameters = DataFrame()):
     self.group = group
     self.variableName = ''
     self.mathematicalRelation = ''
     self.name = str(row[0])
     self.pedigreeMatrix = []
     self.sourceId = ''
     if group == 'ReferenceProduct':
         self.unitName = str(row[1])
         self.amount = str(row[2])
         self.uncertaintyType = str(row[4])
         self.comment = str(row[5])
         self.compartment = ''
         self.subcompartment = ''
         self.basicUncertainty = ''
     elif 'Emissions to' in group or group == 'FromEnvironment':
         self.unitName = str(row[2])
         self.amount = str(row[3])
         self.uncertaintyType = str(row[5])
         self.comment = str(row[8])
         if group == 'FromEnvironment':
             self.compartment = 'natural resources'
         else:
             self.compartment = group.replace('Emissions to ', '')
             self.group = 'ToEnvironment'
         self.subcompartment = str(row[1])
         if not gff.isempty(row[5]):
             self.basicUncertainty = float(row[5])
         else:
             self.basicUncertainty = ''
     elif group == 'FromTechnosphere':
         self.unitName = str(row[1])
         self.amount = str(row[2])
         self.uncertaintyType = str(row[4])
         self.comment = str(row[7])
         self.compartment = ''
         self.subcompartment = ''
         if not gff.isempty(row[4]):
             self.basicUncertainty = float(row[4])
         else:
             self.basicUncertainty = ''
     elif group == 'ByProduct':
         self.unitName = str(row[1])
         self.amount = str(row[2])
         self.uncertaintyType = str(row[3])
         self.comment = str(row[7])
         self.compartment = ''
         self.subcompartment = ''
         if not gff.isempty(row[4]):
             self.basicUncertainty = float(row[4])
         else:
             self.basicUncertainty = ''
     if self.amount[0] == '=':
         #formula entered in excel, not intended to be in the dataset
         self.amount = eval(parser.expr(self.amount[1:]).compile())
     try:
         self.amount = float(self.amount)
     except ValueError: #it's a mathematicalRelation
         self.mathematicalRelation = str(self.amount).strip()
         to_evaluate = deepcopy(self.mathematicalRelation)
         for i in range(len(parameters)):
             sel = parameters.iloc[i]
             to_evaluate = to_evaluate.replace(sel['variableName'], str(sel['amount']))
         try:
             self.amount = eval(parser.expr(to_evaluate).compile())
         except:
             self.amount = 'fix mathematicalRelation'
     for field in ['comment', 'mathematicalRelation']:
         if getattr(self, field) in [0, None]:
             setattr(self, field, '')
     if gff.remove_old_pedigree(self.comment) != self.comment:
         t = deepcopy(self.comment)
         self.comment = gff.remove_old_pedigree(self.comment)
         self.pedigreeMatrix = t.replace(self.comment, '')
         self.comment = self.comment.strip()
         self.pedigreeMatrix = self.pedigreeMatrix[1:].split(')')[0]
         self.pedigreeMatrix = self.pedigreeMatrix.split(',')
         for i in range(len(self.pedigreeMatrix)):
             self.pedigreeMatrix[i] = int(self.pedigreeMatrix[i])
             if self.pedigreeMatrix[i] > 5 or self.pedigreeMatrix[i] < 1:
                 1/0
     uuids = gff.find_uuid(self.comment)
     if len(uuids) > 0:
         if len(uuids) > 1:
             1/0
         self.sourceId = uuids[0]
         self.comment = self.comment.replace(uuids[0] + '; ', '')
     if self.basicUncertainty not in ['', 0., 1, 1.]:
         self.variance = np.log(self.basicUncertainty)**2.
     else:
         self.variance = 0.
 def __init__(self, rows, filename, parameters = DataFrame()):
     self.filename = filename
     headers = ['Products', 'Materials/fuels', 
                'Electricity/heat', 'Waste to treatment']
     positions_ie = dict(zip(headers, [0]*len(headers)))
     headers = ['Resources', 'Emissions to air', 'Emissions to water', 
                'Emissions to soil']
     positions_ee = dict(zip(headers, [0]*len(headers)))
     headers = ['Avoided products', 'Final waste flows', 'Non material emissions', 
         'Social issues', 'Economic issues']
     positions_should_be_empty = dict(zip(headers, [0]*len(headers)))
     for i in range(len(rows)):
         if rows[i][0] in headers_metainfo:
             if rows[i+1][0] == None:
                 setattr(self, headers_metainfo[rows[i][0]], '')
             else:
                 setattr(self, headers_metainfo[rows[i][0]], rows[i+1][0])
         elif rows[i][0] in positions_ie:
             positions_ie[rows[i][0]] = i
         elif rows[i][0] in positions_ee:
             positions_ee[rows[i][0]] = i
         elif rows[i][0] in positions_should_be_empty:
             positions_should_be_empty[rows[i][0]] = i
             if rows[i+1][0] != None:
                 1/0
         elif rows[i][0] == 'Input parameters':
             InputParametersPosition = i
         elif rows[i][0] == 'Calculated parameters':
             CalculatedParameters = i
     for row in rows[InputParametersPosition+1:]:
         if row[0] == None:
             break
         else:
             to_add = {'variableName': str(row[0]), 
                       'amount': float(row[1]), 
                         'uncertaintyType': str(row[2]), 
                         'variance': float(row[3]), 
                         'comment': str(row[9]), 
                         'variableNameLength': len(str(row[0]))}
             parameters = gff.update_df(parameters, {len(parameters): to_add})
     if len(parameters) > 0:
         parameters.sort(columns = 'variableNameLength', ascending = False, inplace = True)
     for row in rows[CalculatedParameters+1:]:
         if row[0] == None:
             break
         else:
             1/0 #to fix later if it occurs
     for group in ['ReferenceProduct', 'FromEnvironment', 'ToEnvironment', 
                   'FromTechnosphere', 'ByProduct']:
         setattr(self, group, [])
     for i in range(positions_ie['Products']+1, 
             positions_should_be_empty['Avoided products']-1):
         if rows[i][0] != None:
             ref_exc = Exchange(rows[i], 'ReferenceProduct')
             self.ReferenceProduct.append(ref_exc)
     for i in range(positions_ie['Materials/fuels']+1, 
             positions_ee['Emissions to air']-1):
         if rows[i][0] != None and i != positions_ie['Electricity/heat']:
             exc = Exchange(rows[i], 'FromTechnosphere', parameters = parameters)
             self.FromTechnosphere.append(exc)
     for i in range(positions_ee['Resources']+1, positions_ie['Materials/fuels']-1):
         if not gff.isempty(rows[i][0]):
             exc = Exchange(rows[i], 'FromEnvironment', parameters = parameters)
             self.FromEnvironment.append(exc)
     for i in range(positions_ie['Waste to treatment']+1, 
             InputParametersPosition-1):
         if rows[i][0] != None:
             exc = Exchange(rows[i], 'ByProduct', parameters = parameters)
             self.FromTechnosphere.append(exc)
     for i in range(positions_ee['Emissions to air'], 
                    positions_should_be_empty['Final waste flows']-1):
         if gff.isempty(rows[i][0]):
             pass
         elif rows[i][0] in positions_ee:
             group = rows[i][0]
         else:
             exc = Exchange(rows[i], group, parameters = parameters)
             self.ToEnvironment.append(exc)
     self.parameters = []
     parameters.set_index('variableName', inplace = True)
     for variableName in parameters.index:
         self.parameters.append(Parameter(parameters.loc[variableName]))