Example #1
0
 def __init__(self, extension, zip_file):
     
     # Load the metadata
     BaseFile.__init__(self, extension, 'extension', zip_file)
     
     # Load the data
     self.warnings = []
     
     # Data in single file
     if len(self.locations) == 1:
         
         # Open file
         content = common._locateFileInZip(zip_file, self.locations[0]).read().split(self.linesTerminatedBy)[self.ignoreHeaderLines:]
         
         # Remove last newline
         if content[-1] == '':
             content = content[:-1]
         
         # Build populatedTerms and main infrastructure
         self.populatedTerms = []
         if self.coreid.index not in self.fields.keys():
             self.populatedTerms.append('coreid')
         for i in sorted(self.fields.keys()):
             term = self.fields[i].term.split("/")[-1].encode(self.encoding)
             self.populatedTerms.append(term)
         for term in self.populatedTerms:
             setattr(self, term, [])
         
         # Load the content
         for line in content:
             # Remove trailing newline
             if line[-1] == "\n":
                 line = line[:-1]
             splitline = line.split(self.fieldsTerminatedBy)
             for pos in range(len(self.populatedTerms)):
                 thiskey = self.populatedTerms[pos]
                 thisvalue = splitline[pos]
                 # Remove field enclosing character, if present
                 if self.fieldsEnclosedBy != '' and thisvalue[0] == self.fieldsEnclosedBy and thisvalue[-1] == self.fieldsEnclosedBy:
                     thisvalue = thisvalue[1:-1]
                 getattr(self, thiskey).append(thisvalue)
     
     # Data in more than one file
     else:
         #TODO
         self.warnings.append("Sorry, multi-file loading of extension data is not yet supported. Basic metadata has been parsed but no actual record has been processed.")
     
     # Load the defaults
     if len(self.defaults) > 0:
         for i in self.defaults:
             term = i.term.split("/")[-1].encode(self.encoding)
             value = i.default.encode(self.encoding)
             if term in self.populatedTerms:
                 self.warnings.append("'{0}' appears in the core file and as a default field in the metafile. Ignoring the default value.".format(term))
                 continue
             else:
                 setattr(self, term, [])
                 self.populatedTerms.append(term)
                 for i in list(range(self.countRecords())):
                     getattr(self, term).append(value)
    def _needsMetafile(self, z, dwca_path):
        """
Checks if the DarwinCore Archive should have a meta.xml file. Cases when meta.xml is NOT needed:
 - One and only one core file, with first row containing headers that correspond to DarwinCore terms, without extensions or metadata file
 - Same as above, but with metadata file called EML.xml. If name is other than EML.xml, metafile is needed
"""
        needsMetafile = False
        headers = None
        
        # List of files other than 'eml.xml' and 'meta.xml'
        list_of_files = [z.namelist()[i] for i in list(range(len(z.namelist()))) if z.namelist()[i] != 'meta.xml' and z.namelist()[i] != 'eml.xml'] if dwca_path.endswith('.zip') else [z.getnames()[i] for i in list(range(len(z.getnames()))) if z.getnames()[i] != 'meta.xml' and z.getnames()[i] != 'eml.xml']

        # If there is more than one file left, metafile is needed        
        if len(list_of_files) > 1:
            needsMetafile = True
        
        # If there is another .xml file, metafile is needed
        elif list_of_files[0].endswith('.xml'):
            needsMetafile = True

        # If first column in single file has terms different than DarwinCore Terms, metafile is needed
        else:
            # Extract first line
            o = common._locateFileInZip(z, list_of_files[0])
            headers_unparsed = o.readline()
            o.close()
            headers = None
            # Split by tabs if txt file
            if list_of_files[0].endswith('.txt'):
                self.fieldsTerminatedBy = "\t"
                self.fieldsEnclosedBy = ""
                headers = headers_unparsed.split("\t")
            else:
                # Split by comma if comma present and semicolon not present
                if "," in headers_unparsed and not ";" in headers_unparsed:
                    self.fieldsTerminatedBy = ","
                    self.fieldsEnclosedBy = "\""
                    headers = headers_unparsed.split(",")
                # Split by semicolon if semicolon present and comma not present
                elif ";" in headers_unparsed and not "," in headers_unparsed:
                    self.fieldsTerminatedBy = ";"
                    self.fieldsEnclosedBy = "\""
                    headers = headers_unparsed.split(";")
            # If separator is not tab, comma or semicolon, metafile is needed
            if headers is None or len (headers) == 1:
                needsMetafile = True
            else:
                # If a single term is not a DarwinCore Term, metafile is needed
                for i in headers:
                    if i.rstrip() not in self.dwcTerms:
                        needsMetafile = True
                        break
        
        return needsMetafile, list_of_files, headers
Example #3
0
 def __init__(self, zip_file):
     
     metafile_content = common._locateFileInZip(zip_file, 'meta.xml')
     if metafile_content is None:
         raise common.MetafileError('Could not find meta.xml.')
    
     metafile = minidom.parse(metafile_content)
     
     self._parseMain(metafile, zip_file)
     self.core = self._parseCore(metafile, zip_file)
     self._parseExtensions(metafile, zip_file)
    def __init__(self, dwca_path):
        """
Initialization function for the DarwinCoreArchive class.
Argument must be the path to a valid DarwinCore archive.

Example:
dwca = DarwinCoreArchive('/path/to/DarwinCore/archive.zip')
"""
#------------------------------------------------------------------------------#
    ##################
    # INITIALIZATION #
    ##################
    
        # Create a container for loading warnings
        self.warnings = []
        
        # Create container for flags
        self.flags = {}
        
        # All DarwinCore Terms and URIs
        self.dwcTerms = {'identificationRemarks': 'http://rs.tdwg.org/dwc/terms/identificationRemarks', 'minimumDepthInMeters': 'http://rs.tdwg.org/dwc/terms/minimumDepthInMeters', 'footprintSRS': 'http://rs.tdwg.org/dwc/terms/footprintSRS', 'verbatimLatitude': 'http://rs.tdwg.org/dwc/terms/verbatimLatitude', 'month': 'http://rs.tdwg.org/dwc/terms/month', 'measurementDeterminedDate': 'http://rs.tdwg.org/dwc/terms/measurementDeterminedDate', 'informationWithheld': 'http://rs.tdwg.org/dwc/terms/informationWithheld', 'lithostratigraphicTerms': 'http://rs.tdwg.org/dwc/terms/lithostratigraphicTerms', 'latestPeriodOrHighestSystem': 'http://rs.tdwg.org/dwc/terms/latestPeriodOrHighestSystem', 'reproductiveCondition': 'http://rs.tdwg.org/dwc/terms/reproductiveCondition', 'continent': 'http://rs.tdwg.org/dwc/terms/continent', 'endDayOfYear': 'http://rs.tdwg.org/dwc/terms/endDayOfYear', 'identificationID': 'http://rs.tdwg.org/dwc/terms/identificationID', 'latestEraOrHighestErathem': 'http://rs.tdwg.org/dwc/terms/latestEraOrHighestErathem', 'occurrenceID': 'http://rs.tdwg.org/dwc/terms/occurrenceID', 'locationAccordingTo': 'http://rs.tdwg.org/dwc/terms/locationAccordingTo', 'latestEpochOrHighestSeries': 'http://rs.tdwg.org/dwc/terms/latestEpochOrHighestSeries', 'coordinateUncertaintyInMeters': 'http://rs.tdwg.org/dwc/terms/coordinateUncertaintyInMeters', 'coordinatePrecision': 'http://rs.tdwg.org/dwc/terms/coordinatePrecision', 'maximumDepthInMeters': 'http://rs.tdwg.org/dwc/terms/maximumDepthInMeters', 'waterBody': 'http://rs.tdwg.org/dwc/terms/waterBody', 'resourceRelationshipID': 'http://rs.tdwg.org/dwc/terms/resourceRelationshipID', 'kingdom': 'http://rs.tdwg.org/dwc/terms/kingdom', 'decimalLatitude': 'http://rs.tdwg.org/dwc/terms/decimalLatitude', 'verbatimTaxonRank': 'http://rs.tdwg.org/dwc/terms/verbatimTaxonRank', 'earliestEraOrLowestErathem': 'http://rs.tdwg.org/dwc/terms/earliestEraOrLowestErathem', 'verbatimCoordinates': 'http://rs.tdwg.org/dwc/terms/verbatimCoordinates', 'acceptedNameUsageID': 'http://rs.tdwg.org/dwc/terms/acceptedNameUsageID', 'infraspecificEpithet': 'http://rs.tdwg.org/dwc/terms/infraspecificEpithet', 'namePublishedIn': 'http://rs.tdwg.org/dwc/terms/namePublishedIn', 'originalNameUsage': 'http://rs.tdwg.org/dwc/terms/originalNameUsage', 'nameAccordingToID': 'http://rs.tdwg.org/dwc/terms/nameAccordingToID', 'dataGeneralizations': 'http://rs.tdwg.org/dwc/terms/dataGeneralizations', 'nomenclaturalStatus': 'http://rs.tdwg.org/dwc/terms/nomenclaturalStatus', 'bibliographicCitation': 'http://purl.org/dc/terms/bibliographicCitation', 'recordNumber': 'http://rs.tdwg.org/dwc/terms/recordNumber', 'day': 'http://rs.tdwg.org/dwc/terms/day', 'individualCount': 'http://rs.tdwg.org/dwc/terms/individualCount', 'type': 'http://purl.org/dc/terms/type', 'measurementType': 'http://rs.tdwg.org/dwc/terms/measurementType', 'institutionID': 'http://rs.tdwg.org/dwc/terms/institutionID', 'georeferenceVerificationStatus': 'http://rs.tdwg.org/dwc/terms/georeferenceVerificationStatus', 'lifeStage': 'http://rs.tdwg.org/dwc/terms/lifeStage', 'measurementUnit': 'http://rs.tdwg.org/dwc/terms/measurementUnit', 'locationRemarks': 'http://rs.tdwg.org/dwc/terms/locationRemarks', 'scientificName': 'http://rs.tdwg.org/dwc/terms/scientificName', 'parentNameUsage': 'http://rs.tdwg.org/dwc/terms/parentNameUsage', 'datasetID': 'http://rs.tdwg.org/dwc/terms/datasetID', 'eventID': 'http://rs.tdwg.org/dwc/terms/eventID', 'lowestBiostratigraphicZone': 'http://rs.tdwg.org/dwc/terms/lowestBiostratigraphicZone', 'habitat': 'http://rs.tdwg.org/dwc/terms/habitat', 'higherGeographyID': 'http://rs.tdwg.org/dwc/terms/higherGeographyID', 'references': 'http://purl.org/dc/terms/references', 'sex': 'http://rs.tdwg.org/dwc/terms/sex', 'accessRights': 'http://purl.org/dc/terms/accessRights', 'scientificNameAuthorship': 'http://rs.tdwg.org/dwc/terms/scientificNameAuthorship', 'associatedTaxa': 'http://rs.tdwg.org/dwc/terms/associatedTaxa', 'year': 'http://rs.tdwg.org/dwc/terms/year', 'taxonRemarks': 'http://rs.tdwg.org/dwc/terms/taxonRemarks', 'rightsHolder': 'http://purl.org/dc/terms/rightsHolder', 'namePublishedInYear': 'http://rs.tdwg.org/dwc/terms/namePublishedInYear', 'identificationVerificationStatus': 'http://rs.tdwg.org/dwc/terms/identificationVerificationStatus', 'eventTime': 'http://rs.tdwg.org/dwc/terms/eventTime', 'basisOfRecord': 'http://rs.tdwg.org/dwc/terms/basisOfRecord', 'latestEonOrHighestEonothem': 'http://rs.tdwg.org/dwc/terms/latestEonOrHighestEonothem', 'otherCatalogNumbers': 'http://rs.tdwg.org/dwc/terms/otherCatalogNumbers', 'georeferenceRemarks': 'http://rs.tdwg.org/dwc/terms/georeferenceRemarks', 'acceptedNameUsage': 'http://rs.tdwg.org/dwc/terms/acceptedNameUsage', 'georeferenceSources': 'http://rs.tdwg.org/dwc/terms/georeferenceSources', 'specificEpithet': 'http://rs.tdwg.org/dwc/terms/specificEpithet', 'verbatimLocality': 'http://rs.tdwg.org/dwc/terms/verbatimLocality', 'identificationReferences': 'http://rs.tdwg.org/dwc/terms/identificationReferences', 'measurementRemarks': 'http://rs.tdwg.org/dwc/terms/measurementRemarks', 'georeferencedBy': 'http://rs.tdwg.org/dwc/terms/georeferencedBy', 'geodeticDatum': 'http://rs.tdwg.org/dwc/terms/geodeticDatum', 'occurrenceRemarks': 'http://rs.tdwg.org/dwc/terms/occurrenceRemarks', 'collectionCode': 'http://rs.tdwg.org/dwc/terms/collectionCode', 'higherGeography': 'http://rs.tdwg.org/dwc/terms/higherGeography', 'nameAccordingTo': 'http://rs.tdwg.org/dwc/terms/nameAccordingTo', 'latestAgeOrHighestStage': 'http://rs.tdwg.org/dwc/terms/latestAgeOrHighestStage', 'fieldNumber': 'http://rs.tdwg.org/dwc/terms/fieldNumber', 'measurementMethod': 'http://rs.tdwg.org/dwc/terms/measurementMethod', 'disposition': 'http://rs.tdwg.org/dwc/terms/disposition', 'earliestEpochOrLowestSeries': 'http://rs.tdwg.org/dwc/terms/earliestEpochOrLowestSeries', 'group': 'http://rs.tdwg.org/dwc/terms/group', 'highestBiostratigraphicZone': 'http://rs.tdwg.org/dwc/terms/highestBiostratigraphicZone', 'ownerInstitutionCode': 'http://rs.tdwg.org/dwc/terms/ownerInstitutionCode', 'scientificNameID': 'http://rs.tdwg.org/dwc/terms/scientificNameID', 'relationshipEstablishedDate': 'http://rs.tdwg.org/dwc/terms/relationshipEstablishedDate', 'earliestAgeOrLowestStage': 'http://rs.tdwg.org/dwc/terms/earliestAgeOrLowestStage', 'country': 'http://rs.tdwg.org/dwc/terms/country', 'measurementDeterminedBy': 'http://rs.tdwg.org/dwc/terms/measurementDeterminedBy', 'decimalLongitude': 'http://rs.tdwg.org/dwc/terms/decimalLongitude', 'locationID': 'http://rs.tdwg.org/dwc/terms/locationID', 'rights': 'http://purl.org/dc/terms/rights', 'relationshipRemarks': 'http://rs.tdwg.org/dwc/terms/relationshipRemarks', 'startDayOfYear': 'http://rs.tdwg.org/dwc/terms/startDayOfYear', 'formation': 'http://rs.tdwg.org/dwc/terms/formation', 'genus': 'http://rs.tdwg.org/dwc/terms/genus', 'family': 'http://rs.tdwg.org/dwc/terms/family', 'collectionID': 'http://rs.tdwg.org/dwc/terms/collectionID', 'dynamicProperties': 'http://rs.tdwg.org/dwc/terms/dynamicProperties', 'eventRemarks': 'http://rs.tdwg.org/dwc/terms/eventRemarks', 'municipality': 'http://rs.tdwg.org/dwc/terms/municipality', 'individualID': 'http://rs.tdwg.org/dwc/terms/individualID', 'footprintWKT': 'http://rs.tdwg.org/dwc/terms/footprintWKT', 'county': 'http://rs.tdwg.org/dwc/terms/county', 'associatedMedia': 'http://rs.tdwg.org/dwc/terms/associatedMedia', 'associatedSequences': 'http://rs.tdwg.org/dwc/terms/associatedSequences', 'subgenus': 'http://rs.tdwg.org/dwc/terms/subgenus', 'footprintSpatialFit': 'http://rs.tdwg.org/dwc/terms/footprintSpatialFit', 'measurementValue': 'http://rs.tdwg.org/dwc/terms/measurementValue', 'higherClassification': 'http://rs.tdwg.org/dwc/terms/higherClassification', 'islandGroup': 'http://rs.tdwg.org/dwc/terms/islandGroup', 'resourceID': 'http://rs.tdwg.org/dwc/terms/resourceID', 'class': 'http://rs.tdwg.org/dwc/terms/class', 'verbatimSRS': 'http://rs.tdwg.org/dwc/terms/verbatimSRS', 'associatedOccurrences': 'http://rs.tdwg.org/dwc/terms/associatedOccurrences', 'catalogNumber': 'http://rs.tdwg.org/dwc/terms/catalogNumber', 'verbatimLongitude': 'http://rs.tdwg.org/dwc/terms/verbatimLongitude', 'preparations': 'http://rs.tdwg.org/dwc/terms/preparations', 'taxonID': 'http://rs.tdwg.org/dwc/terms/taxonID', 'nomenclaturalCode': 'http://rs.tdwg.org/dwc/terms/nomenclaturalCode', 'maximumElevationInMeters': 'http://rs.tdwg.org/dwc/terms/maximumElevationInMeters', 'verbatimCoordinateSystem': 'http://rs.tdwg.org/dwc/terms/verbatimCoordinateSystem', 'measurementID': 'http://rs.tdwg.org/dwc/terms/measurementID', 'relatedResourceID': 'http://rs.tdwg.org/dwc/terms/relatedResourceID', 'datasetName': 'http://rs.tdwg.org/dwc/terms/datasetName', 'earliestEonOrLowestEonothem': 'http://rs.tdwg.org/dwc/terms/earliestEonOrLowestEonothem', 'measurementAccuracy': 'http://rs.tdwg.org/dwc/terms/measurementAccuracy', 'verbatimDepth': 'http://rs.tdwg.org/dwc/terms/verbatimDepth', 'bed': 'http://rs.tdwg.org/dwc/terms/bed', 'georeferencedDate': 'http://rs.tdwg.org/dwc/terms/georeferencedDate', 'behavior': 'http://rs.tdwg.org/dwc/terms/behavior', 'island': 'http://rs.tdwg.org/dwc/terms/island', 'parentNameUsageID': 'http://rs.tdwg.org/dwc/terms/parentNameUsageID', 'minimumElevationInMeters': 'http://rs.tdwg.org/dwc/terms/minimumElevationInMeters', 'occurrenceStatus': 'http://rs.tdwg.org/dwc/terms/occurrenceStatus', 'vernacularName': 'http://rs.tdwg.org/dwc/terms/vernacularName', 'pointRadiusSpatialFit': 'http://rs.tdwg.org/dwc/terms/pointRadiusSpatialFit', 'countryCode': 'http://rs.tdwg.org/dwc/terms/countryCode', 'phylum': 'http://rs.tdwg.org/dwc/terms/phylum', 'institutionCode': 'http://rs.tdwg.org/dwc/terms/institutionCode', 'identificationQualifier': 'http://rs.tdwg.org/dwc/terms/identificationQualifier', 'namePublishedInID': 'http://rs.tdwg.org/dwc/terms/namePublishedInID', 'identifiedBy': 'http://rs.tdwg.org/dwc/terms/identifiedBy', 'earliestPeriodOrLowestSystem': 'http://rs.tdwg.org/dwc/terms/earliestPeriodOrLowestSystem', 'minimumDistanceAboveSurfaceInMeters': 'http://rs.tdwg.org/dwc/terms/minimumDistanceAboveSurfaceInMeters', 'language': 'http://purl.org/dc/terms/language', 'maximumDistanceAboveSurfaceInMeters': 'http://rs.tdwg.org/dwc/terms/maximumDistanceAboveSurfaceInMeters', 'taxonConceptID': 'http://rs.tdwg.org/dwc/terms/taxonConceptID', 'georeferenceProtocol': 'http://rs.tdwg.org/dwc/terms/georeferenceProtocol', 'locality': 'http://rs.tdwg.org/dwc/terms/locality', 'associatedReferences': 'http://rs.tdwg.org/dwc/terms/associatedReferences', 'stateProvince': 'http://rs.tdwg.org/dwc/terms/stateProvince', 'taxonomicStatus': 'http://rs.tdwg.org/dwc/terms/taxonomicStatus', 'relationshipAccordingTo': 'http://rs.tdwg.org/dwc/terms/relationshipAccordingTo', 'member': 'http://rs.tdwg.org/dwc/terms/member', 'relationshipOfResource': 'http://rs.tdwg.org/dwc/terms/relationshipOfResource', 'taxonRank': 'http://rs.tdwg.org/dwc/terms/taxonRank', 'previousIdentifications': 'http://rs.tdwg.org/dwc/terms/previousIdentifications', 'samplingEffort': 'http://rs.tdwg.org/dwc/terms/samplingEffort', 'verbatimElevation': 'http://rs.tdwg.org/dwc/terms/verbatimElevation', 'establishmentMeans': 'http://rs.tdwg.org/dwc/terms/establishmentMeans', 'typeStatus': 'http://rs.tdwg.org/dwc/terms/typeStatus', 'samplingProtocol': 'http://rs.tdwg.org/dwc/terms/samplingProtocol', 'originalNameUsageID': 'http://rs.tdwg.org/dwc/terms/originalNameUsageID', 'eventDate': 'http://rs.tdwg.org/dwc/terms/eventDate', 'geologicalContextID': 'http://rs.tdwg.org/dwc/terms/geologicalContextID', 'fieldNotes': 'http://rs.tdwg.org/dwc/terms/fieldNotes', 'dateIdentified': 'http://rs.tdwg.org/dwc/terms/dateIdentified', 'verbatimEventDate': 'http://rs.tdwg.org/dwc/terms/verbatimEventDate', 'recordedBy': 'http://rs.tdwg.org/dwc/terms/recordedBy', 'modified': 'http://purl.org/dc/terms/modified', 'order': 'http://rs.tdwg.org/dwc/terms/order'}
        
        # Parse and load the compressed file, either a .zip or a .tar.gz
        z = self._loadCompressedFile(dwca_path)            
        
        # Parse the need for a meta.xml
        needsMetafile, list_of_files, headers = self._needsMetafile(z, dwca_path)
        
#------------------------------------------------------------------------------#
    ############
    # METAFILE #
    ############
        
        # Even if there is no need for a metafile, if there IS a metafile, take advantage of it
        if needsMetafile or common._locateFileInZip(z, 'meta.xml'):
            self.metafile = Metafile(z)
            
            # Wrapper for certain key attributes
            self.core = self.metafile.core
            self.extensions = self.metafile.extensions
            self.locations = self.core.locations
            
            # Core field parsing elements
            self.linesTerminatedBy = self.core.linesTerminatedBy
            self.fieldsTerminatedBy = self.core.fieldsTerminatedBy
            self.fieldsEnclosedBy = self.core.fieldsEnclosedBy
            self.ignoreHeaderLines = self.core.ignoreHeaderLines
            self.rowType = self.core.rowType
            self.encoding = self.core.encoding
            self.dateFormat = self.core.dateFormat
        
        else:
            self.metafile = None
            self.extensions = []
            self.locations = list_of_files
            
            # Load defaults except fieldsTerminatedBy and fieldsEnclosedBy, which depends on the file type
            self.linesTerminatedBy = "\n"
            self.ignoreHeaderLines = 1
            self.rowType = "http://rs.tdwg.org/dwc/xsd/simpledarwincore/SimpleDarwinRecord"
            self.encoding = "UTF-8"
            self.dateFormat = "YYYY-MM-DD"
        
        
        # Build populatedTerms
        
        # Container
        self.populatedTerms = []
        
        # If metafile is present, take values from it
        if self.metafile:
            # If field with index = 0 exists, don't add id; else, add id
            if 0 not in self.metafile.core.fields.keys() and self.metafile.core.id.index == 0:
                self.populatedTerms.append('id')
            for index in sorted(self.metafile.core.fields.keys()):
                uri = self.metafile.core.fields[index].term
                for term in self.dwcTerms.keys():
                    if self.dwcTerms[term] == uri:
                        self.populatedTerms.append(str(term))
        
        # If not, build populatedTerms from first row
        else:
            # If meta.xml is not needed, headers are already parsed in the needsMetafile section
            for i in headers:
                self.populatedTerms.append(i.rstrip())
        
        
#------------------------------------------------------------------------------#
    ############
    # METADATA #
    ############
        
        #TODO
        
        
#------------------------------------------------------------------------------#
    ###########
    # CONTENT #
    ###########
        

        # Open the main content file and load the records
        
        # If locations contains only one file
        if len(self.locations) == 1:
            occfile = self.locations[0]
            # Assessment of the core file is made when loading metafile
            content = common._locateFileInZip(z, occfile)
            
            occlines = content.read().split(self.linesTerminatedBy)[self.ignoreHeaderLines:]
            # Remove last newline
            if occlines[-1] == '':
                occlines = occlines[:-1]
            
            # Trying to store values in Field elements and reference them after the import
            # Build main infrastructure
            #for term in self.dwcTerms.keys():
            #    setattr(self, term, [])

            # Main process
            colnames = self.populatedTerms
            warnings = []
            for line in occlines:
                if line[-1] == "\n":
                    line = line[:-1]
                splitline = line.split(self.fieldsTerminatedBy)
                for pos in range(len(colnames)):
                    thiskey = colnames[pos]
                    thisvalue = splitline[pos]
                    # Remove the enclosing characters if present
                    if self.fieldsEnclosedBy != '' and thisvalue[0] == self.fieldsEnclosedBy and thisvalue[-1] == self.fieldsEnclosedBy:
                        thisvalue = thisvalue[1:-1]
                    try:
                        #getattr(self, thiskey).append(thisvalue)
                        self.metafile.core.fields[pos].values.append(thisvalue)
                    except KeyError:
                        self.metafile.core.id.values.append(thisvalue)
                        #if thiskey not in warnings:
                        #    warnings.append(thiskey)
                        #    setattr(self, thiskey, [])
                        #getattr(self, thiskey).append(thisvalue)
                        #self.metafile.core.fields[pos].values.append(thisvalue)

            # Shortcut to the values
            for i in list(range(len(self.populatedTerms))):
                if i == 0:
                    setattr(self, self.populatedTerms[i], self.metafile.core.id)
                else:
                    setattr(self, self.populatedTerms[i], self.metafile.core.fields[i])

        # If locations contains more than one file
        else:
            #TODO
            self.warnings.append("Sorry, multi-file loading of core data is not yet supported. Basic metadata has been parsed but no actual record has been processed.")
        
        # Defaults
        if self.metafile:
            if len(self.metafile.core.defaults) > 0:
                for i in self.metafile.core.defaults:
                    term = str(i.term.split("/")[-1])
                    value = str(i.default)
                    if term in self.populatedTerms:
                        self.warnings.append("'{0}' appears in the core file and as a default field in the metafile. Ignoring the default value.".format(term))
                        continue
                    else:
                        #setattr(self, term, [])
                        self.populatedTerms.append(term)
                        #for i in list(range(self.countRecords())):
                        #    getattr(self, term).append(value)
                        i.values = [value]*len(getattr(self, self.populatedTerms[0]).values)
                        setattr(self, term, i)

        
        # Print warnings
        if len(warnings) > 0:
            for i in warnings:
                self.warnings.append("'{0}' cannot be found in the list of DarwinCore terms. The use of rename or batchRename functions is suggested.".format(i))

        
#------------------------------------------------------------------------------#
    #######
    # END #
    #######
        
        # Print stats from the import
        
        # Total number of records imported
        print "{0} records imported from main file/s".format(len(getattr(self, self.populatedTerms[0]).values))
        
        # Extensions loaded
        if len(self.extensions.keys()) > 0:
            print "{0} extensions loaded: {1}".format(len(self.extensions), ", ".join(self.extensions.keys()))
        
        # Heads up for warnings
        if len(self.warnings) > 0:
            print "IMPORTANT: some warning messages have been stored. To see them, call the function showWarnings()"