Ejemplo n.º 1
0
import os, sys
parentPath = os.path.abspath("..")
if parentPath not in sys.path:
    sys.path.insert(0, parentPath)

from model.location import Location


def test(expect, output):
    if expect == output:
        print("OK")
    else:
        print("ERR", expect, " ", output)


l = Location(0.5)
test(l.isNorm('a'), True)
test(l.isNorm(' '), True)
test(l.isNorm('1'), True)
test(l.isNorm(')'), False)
test(l.normalize('It is a good day.'), 'It is a good day')
test(l.getLoc(['It is a good day.']), 'it is a good day')
test(l.isFuzzyEqual('It is a good day.', 'It is a good night.'), 1.0)
test(l.isFuzzyEqual('It is a good day.', 'It is a bad night.'), 0.0)
test(l.VangerFisher('Morskaya', 'Morskoy'), True)
test(l.VangerFisher('Morskaya', 'Mars'), False)
Ejemplo n.º 2
0
class Extractor:
    def __init__(self, config):
        self.pStart = -1
        self.pEnd = -1
        self.metaTitle = False
        self.metaContent = False
        self.metaName = False
        self.metaLocation = False
        self.metaKeyWord = False
        self.metaRef = False
        self.metaOrg = False
        self.metaMisc = False
        self.metaAll = False
        self.config = config
        self.INFilename = 'in.pdf'
        self.OUTFilename = 'out.txt'
        self.pdf = None
        self.title = ''

        self.origin = ''

        self.descriptAbstract = ''
        self.descriptPurpose = ''
        self.descriptSupplemental = ''

        self.dateBegin = ''
        self.dateEnd = ''

        self.statusProgress = ''
        self.statusUpdate = ''

        self.access = ''

        #self.names = set()
        #self.locations = set()
        #self.keys = []
        self.typeOut = "txt"
        self.refs = []

        self.contact = DataPerson("")
        self.namesData = []
        self.keywordsData = []
        self.keywordsLocData = []
        self.locationsData = []

        self.contact = DataPerson('')

        self.genUUID = True
        self.uuid = ""

        self.miscData = []
        self.orgData = []
        self.locData = []

    def reinit():
        self.namesData = []
        self.keywordsData = []
        self.locationsData = []

    def addName(self):
        self.namesData.append(DataPerson(''))

    def addKeyword(self):
        self.keywordsData.append(DataKeyword(''))

    def addKeywordLoc(self):
        self.keywordsLocData.append(DataKeyword(''))

    def addLocation(self):
        self.locationsData.append(DataLocation(''))

    def addReference(self):
        self.refs.append(DataRef(''))

    def delName(self, id):
        self.namesData[id - 1:id] = []

    def delKeyword(self, id):
        self.keywordsData[id - 1:id] = []

    def delKeywordLoc(self, id):
        self.keywordsLocData[id - 1:id] = []

    def delLocation(self, id):
        self.locationsData[id - 1:id] = []

    def delReference(self, id):
        self.locationsData[id - 1:id] = []

    def extractRange(self):
        self.typeOut = "txt"
        self.pdf = PDFContainer(format=self.config.outPDFFormat,
                                codec=self.config.fileCodec)
        if self.pdf.format == "filter":
            self.pdf.convertPDFFilter(self.INFilename)
        else:
            self.pdf.convertPDFAlternative(self.INFilename)
        self.tokenizer = nltk.data.load(self.config.sentencesSplitterModel)
        self.extractorNer = NERExtractor(self.config)
        self.extractorLoc = Location(self.config.minTanimoto)

        if self.pEnd == -1:
            self.pEnd = self.pStart

        # extract ner
        txt = self.pdf.getPages(self.pStart, self.pEnd)
        sents = txt.split('\n')  # tokenizer.tokenize(txt)
        if self.metaName or self.metaAll:
            names = self.extractTags(sents, ["I-PER", "B-PER"])
            for s in names:
                self.namesData.append(s)
        if self.metaLocation or self.metaAll:
            loc = self.extractTags(sents, ["I-LOC", "B-LOC"])
            for s in loc:
                self.locData.append(s)
        if self.metaOrg or self.metaAll:
            org = self.extractTags(sents, ["I-ORG", "B-ORG"])
            for s in org:
                self.orgData.append(s)
        if self.metaMisc or self.metaAll:
            misc = self.extractTags(sents, ["I-MISC", "B-MISC"])
            for s in misc:
                self.miscData.append(s)
        if self.metaLocation or self.metaAll:
            # extract locations with coords
            sents = self.tokenizer.tokenize(txt)
            self.extractLocation(sents)
        if self.metaKeyWord or self.metaAll:
            # extract key words
            self.extractKeyWords(txt)
        if self.metaRef or self.metaAll:
            # extract refs
            self.extractRefs(txt)
        #SAVE
        print(self.OUTFilename + ' - OUT_FILE')
        res = ""
        if self.metaName or self.metaAll:
            res += 'NAMES\n'
            for s in self.namesData:
                res += s + '\n'
        if self.metaLocation or self.metaAll:
            res += 'LOCATIONS\n'
            for s in self.locationsData:
                res += s.genText() + '\n'
            res += 'OTHER LOCATIONS\n'
            for s in self.locData:
                res += s + '\n'
        if self.metaOrg or self.metaAll:
            res += 'ORGANISATION\n'
            for s in self.orgData:
                res += s + '\n'
        if self.metaMisc or self.metaAll:
            res += 'MISC\n'
            for s in self.miscData:
                res += s + '\n'
        if self.metaKeyWord or self.metaAll:
            res += 'KEY WORDS\n'
            i = 0
            for s in self.keywordsData:
                kp = s.genText()
                if i >= self.config.countKeyPhrases:
                    break
                if len(kp.split()) > self.config.maxKeyPhraseLength or len(
                        kp) < 4:
                    continue
                res += kp + '\n'
                i += 1
            for s in self.keywordsLocData:
                kp = s.genText()
                res += kp + '\n'
        if self.metaRef or self.metaAll:
            res += 'REFS\n'
            for s in self.refs:
                res += s.genText() + '\n'
        self.saveFile(self.OUTFilename, res)

    def extract(self):
        self.pdf = PDFContainer(format=self.config.outPDFFormat,
                                codec=self.config.fileCodec)
        if self.pdf.format == "filter":
            self.pdf.convertPDFFilter(self.INFilename)
        else:
            self.pdf.convertPDFAlternative(self.INFilename)

        self.tokenizer = nltk.data.load(self.config.sentencesSplitterModel)

        self.extractorNer = NERExtractor(self.config)

        self.extractorLoc = Location(self.config.minTanimoto)

        # extract title
        txt = self.pdf.getPages(0, 3)
        self.extractTitle(txt)

        # extract names
        txt = self.pdf.getPages(0, 10)
        sents = txt.split('\n')  # tokenizer.tokenize(txt)
        self.extractName(sents)

        # extract locations with coords
        txt = self.pdf.getAllPages()
        sents = self.tokenizer.tokenize(txt)
        self.extractLocation(sents)

        # extract key words
        self.extractKeyWords(txt)

        # extract refs
        self.extractRefs(txt)

    def save(self):
        print(self.typeOut)
        if self.typeOut == 'txt':
            text = self.saveToTXT()
            self.saveFile(self.OUTFilename, text)
        elif self.typeOut == 'iso19115v2':
            text = self.saveToISO19115v2()
            self.saveFile(self.OUTFilename, text)
        elif self.typeOut == 'fgdc':
            text = self.saveToFGDC()
            self.saveFile(self.OUTFilename, text)
        elif self.typeOut == 'dublin':
            self.saveToDublin()

    def load(self):
        print(self.typeOut)
        if self.typeOut == 'iso19115v2':
            text = self.saveToISO19115v2()
            code, ans = insertData(self.config.protocol, self.config.url,
                                   self.config.user, self.config.passwd, text)
        elif self.typeOut == 'fgdc':
            text = self.saveToFGDC()
            code, ans = insertData(self.config.protocol, self.config.url,
                                   self.config.user, self.config.passwd, text)
        return code

    def loadFromFile(self, infile):
        print(infile)
        code, ans = insertDataFromFile(self.config.protocol, self.config.url,
                                       self.config.user, self.config.passwd,
                                       infile)
        return code

    def extractRefs(self, txt):
        extr = ExtracrReference(txt)
        _refs = extr.extract()
        for r in _refs:
            self.refs.append(DataRef(r))

    def extractTitle(self, txt):
        self.title = extractTitle(txt)
        _d = re.search(r'[0-9]{4}', self.title)
        if _d is None:
            _date = ""
        else:
            _date = _d.group(0)
        self.dateBegin = _date
        self.dateEnd = "present"
        if self.genUUID:
            self.uuid = self.genIdentifier()

    def extractName(self, sentences):
        names = self.extractTags(sentences, ["I-PER", "B-PER"])
        _names = []
        for s in names:
            res = re.search(
                r'[/0-9()]|(University)|(Database)|(Ecology)|(No\.)', s,
                re.IGNORECASE | re.UNICODE)
            if res is None:
                _names.append(s)
        for s in _names:
            self.namesData.append(DataPerson(s))

    def extractTags(self, sentences, tags):
        names = set()
        for sentence in sentences:
            wordsRaw, preds = self.extractorNer.extractFromSentence(sentence)
            test = False
            res = ''
            for i, w in enumerate(wordsRaw):
                #STXTfile.write(w + ' - ' + preds[i] + '\n')
                if preds[
                        i] in tags:  #preds[i] == "I-PER" or preds[i] == "B-PER":
                    #if i > 0 and (preds[i-1] == "I-LOC" or preds[i-1] == "B-LOC" or preds[i-1] == "I-ORG" or preds[i-1] == "B-ORG"):
                    #	res += wordsRaw[i-1] + ' '
                    res += w + ' '
                    continue
                else:
                    if res != '':
                        res = res.strip()
                        ress = res.split(',')
                        for r in ress:
                            r = r.strip()
                            if len(r.split(' ')) > 1:
                                names.add(r.strip())
                        res = ""
            if res != '':
                res = res.strip()
                ress = res.split(',')
                for r in ress:
                    r = r.strip()
                    if len(r.split(' ')) > 1:
                        names.add(r.strip())
        return names

    def extractLocation(self, sents):
        locmap = {}
        locmap.update(Coves)
        locmap.update(Seas)
        locmap.update(Bays)
        locmap.update(Islands)

        candidates = []

        for s in sents:
            ls = s.lower()
            for w in refWords:
                if ls.find(w) != -1:
                    candidates.append(s)
                    break
        locations = set()
        for sentence in candidates:
            wordsRaw, preds = self.extractorNer.extractFromSentence(sentence)
            res = ""
            for i, w in enumerate(wordsRaw):
                if preds[i] == "I-LOC" or preds[i] == "B-LOC":
                    res += w + ' '  # + ' {' + preds[i] + '} '
                    continue
                else:
                    if w == 'of' and res != '':
                        res += w + ' '
                        continue
                res = res.strip()
                if res != '':
                    # get coords Coves, Seas, Bays, Islands
                    for key in locmap:
                        if self.extractorLoc.isFuzzyEqual(res, key, 3):
                            r = key + '+' + ''.join(
                                str(x) + '+' for x in locmap[key]) + '\n'
                            locations.add(r)
                            break
                    res = ''
        for s in locations:
            self.locationsData.append(DataLocation(s))
        for s in locations:
            kw = s.split('+')[0]
            self.keywordsLocData.append(DataKeyword(kw))

    def extractKeyWords(self, txt):
        stopwords = []
        with open(self.config.stopWords, encoding=self.config.fileCodec) as f:
            for line in f:
                stopwords.append(line[:len(line) - 1])

        ke = KeywordExtractor(stopwords=stopwords,
                              punctuations=self.config.punctuations)
        ke.extractKeyWords(txt)
        ans = ke.getRankedPhrases()

        keys = []
        #STXTfile = codecs.open("regex.txt", "w", "utf-8")
        for w in ans:
            if len(keys) >= self.config.countKeyPhrases:
                break
            if len(w.split()) > self.config.maxKeyPhraseLength or len(w) < 4:
                continue
            if re.search('[\[\]\+\*]', w, re.IGNORECASE | re.UNICODE):
                continue
            _w = re.search(w, txt, re.IGNORECASE | re.UNICODE)
            if _w:
                keys.append(_w.group(0))
        #STXTfile.close()
        for k in keys:
            self.keywordsData.append(DataKeyword(k))

    def genIdentifier(self):
        res = ''
        for i in range(0, 8):
            res += random.choice(string.ascii_letters + "0123456789")
        res += "-"
        for i in range(0, 4):
            res += random.choice(string.ascii_letters + "0123456789")
        res += "-"
        for i in range(0, 4):
            res += random.choice(string.ascii_letters + "0123456789")
        res += "-"
        for i in range(0, 4):
            res += random.choice(string.ascii_letters + "0123456789")
        res += "-"
        for i in range(0, 12):
            res += random.choice(string.ascii_letters + "0123456789")
        return res

    def saveToISO19115v2(self):
        print(self.OUTFilename + ' - OUT_FILE')

        year = datetime.date.today().year
        month = datetime.date.today().month
        day = datetime.date.today().day

        #<gmi:MI_Metadata xmlns:gmi="http://www.isotc211.org/2005/gmi" xmlns:gmd="http://www.isotc211.org/2005/gmd" xmlns:gco="http://www.isotc211.org/2005/gco" xmlns:gml="http://www.opengis.net/gml/3.2" xmlns:gsr="http://www.isotc211.org/2005/gsr" xmlns:gss="http://www.isotc211.org/2005/gss" xmlns:gst="http://www.isotc211.org/2005/gst" xmlns:gmx="http://www.isotc211.org/2005/gmx" xmlns:gfc="http://www.isotc211.org/2005/gfc" xmlns:srv="http://www.isotc211.org/2005/srv" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.isotc211.org/2005/gmi ftp://ftp.ncddc.noaa.gov/pub/Metadata/Online_ISO_Training/Intro_to_ISO/schemas/ISObio/schema.xsd">
        res = """
		<gmd:MD_Metadata xmlns:gmd="http://www.isotc211.org/2005/gmd"
			xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
			xmlns:gco="http://www.isotc211.org/2005/gco"
			xmlns:gml="http://www.opengis.net/gml"
			xsi:schemaLocation="http://www.isotc211.org/2005/gmd ../schema.xsd">
		<fileIdentifier>
			<gco:CharacterString>""" + self.uuid + """</gco:CharacterString>
		</fileIdentifier>
		<gmd:language>
			<gco:CharacterString>eng</gco:CharacterString>
		</gmd:language>
		<gmd:characterSet>
			<gmd:MD_CharacterSetCode codeListValue="utf8"
				codeList="http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_19139_Schemas/resources/codelist/ML_gmxCodelists.xml#MD_CharacterSetCode"/>
		</gmd:characterSet>

		<gmd:hierarchyLevel>
			gmd:<MD_ScopeCode codeList="http://www.isotc211.org/2005/resources/codeList.xml#MD_ScopeCode"
				codeListValue="dataset"/>
		</gmd:hierarchyLevel>
		<gmd:hierarchyLevelName>
			<gco:CharacterString>dataset</gco:CharacterString>
		</gmd:hierarchyLevelName>

		<gmd:dateStamp>
			<gco:DateTime>""" + str(year) + '-' + str(month) + '-' + str(
            day) + """</gco:DateTime>
		</gmd:dateStamp>

		<gmd:metadataStandardName>
			<gco:CharacterString>ISO 19115:2003/19139</gco:CharacterString>
		</gmd:metadataStandardName>
		<gmd:metadataStandardVersion>
			<gco:CharacterString>1.0</gco:CharacterString>
		</gmd:metadataStandardVersion>

		
		"""

        #res += """
        #<gmd:abstract>
        #	<gco:CharacterString>""" + self.descriptAbstract + """</gco:CharacterString>
        #</gmd:abstract>
        #"""

        # Contact
        res += "<gmd:contact>"
        res += self.contact.genISO115v2()
        res += "</gmd:contact>"

        if self.metaTitle or self.metaAll:
            _d = re.search(r'[0-9]{4}', self.title)
            if _d is None:
                _date = ""
            else:
                _date = _d.group(0)
            _title = self.title.replace('\n', ' ')
            res += """
		<gmd:identificationInfo>
			<gmd:MD_DataIdentification>
				<gmd:citation>
					<gmd:CI_Citation>
						<gmd:title>
							<gco:CharacterString>""" + _title + """</gco:CharacterString>
						</gmd:title>
						<gmd:date>
							<gmd:CI_Date>
								<gmd:date>
									<gco:DateTime>""" + _date + """</gco:DateTime>
								</gmd:date>
							</gmd:CI_Date>
						</gmd:date>
					</gmd:CI_Citation>
				</gmd:citation>
			</gmd:MD_DataIdentification>
		"""
        res += """
		<abstract>
			<gco:CharacterString>""" + self.descriptAbstract + """</gco:CharacterString>
		</abstract>
		<purpose>
			<gco:CharacterString>""" + self.descriptPurpose + """</gco:CharacterString>
		</purpose>
		<status>
			<MD_ProgressCode
				codeList="http://www.isotc211.org/2005/resources/codeList.xml#MD_ProgressCode"
				codeListValue=""" + '"' + self.statusProgress + '"' + """/>
		</status>
		"""
        #if self.metaContent or self.metaAll:
        #	STXTfile.write('CONTENT\n')
        #	for t in self.pdf.getTitles():
        #		STXTfile.write(t+'\n')
        if self.metaName or self.metaAll:
            res += "<gmd:pointOfContact>"
            for s in self.namesData:
                res += s.genISO115v2()
            res += "</gmd:pointOfContact>"
        if self.metaLocation or self.metaAll:
            res += """
		<gmd:extent>
			<gmd:EX_Extent>
				<gmd:description>
					<gco:CharacterString>Spatial extent for locations</gco:CharacterString>
				</gmd:description>
		"""
            for s in self.locationsData:
                res += s.genISO115v2()
            res += """
			</gmd:EX_Extent>
		</gmd:extent>
		"""
        if self.metaKeyWord or self.metaAll:

            kwTypes = {}
            locTypes = {}

            for s in self.keywordsData:
                if s.type in kwTypes:
                    if s.keyword != "":
                        kwTypes[s.type].append(s)
                else:
                    if s.keyword != "":
                        kwTypes[s.type] = []
                        kwTypes[s.type].append(s)

            for s in self.keywordsLocData:
                if s.type in locTypes:
                    if s.keyword != "":
                        locTypes[s.type].append(s)
                else:
                    if s.keyword != "":
                        locTypes[s.type] = []
                        locTypes[s.type].append(s)
            res += """
			<gmd:descriptiveKeywords>
			"""
            for key in kwTypes.keys():
                res += "<gmd:MD_Keywords>\n"
                #res += "<themekt>" + key + "</themekt>\n"
                for val in kwTypes[key]:
                    res += val.genISO115v2()
                res += """
				<type>
					<MD_KeywordTypeCode
						codeList="http://metadata.dgiwg.org/codelistRegistry?MD_KeywordTypeCode"
						codeListValue=""" + '"' + key + '"' + """/>
				</type>
				"""
                res += "</gmd:MD_Keywords>\n"

            for key in locTypes.keys():
                res += "<gmd:MD_Keywords>\n"
                #res += "<themekt>" + key + "</themekt>\n"
                for val in locTypes[key]:
                    res += val.genISO115v2()
                res += """
				<type>
					<MD_KeywordTypeCode
						codeList="http://metadata.dgiwg.org/codelistRegistry?MD_KeywordTypeCode"
						codeListValue=""" + '"' + key + '"' + """/>
				</type>
				"""
                res += "</gmd:MD_Keywords>\n"

            #for s in self.keywordsData:
            #	res += s.genISO115v2()
            #for s in self.keywordsLocData:
            #	res += s.genISO115v2()
            res += """
			</gmd:descriptiveKeywords>
			"""
        #if self.metaRef or self.metaAll:
        #	res += "<gmd:citation>\n"
        #	for s in self.refs:
        #		res += s.genISO115v2()
        #	res += "</gmd:citation>\n"

        res += "</gmd:identificationInfo>\n</gmd:MD_Metadata>\n"

        #STXTfile = codecs.open(self.OUTFilename, "w", self.config.fileCodec)
        #STXTfile.write(res)
        #STXTfile.close()
        return res
        #self.saveFile(self.OUTFilename, res)

    def saveFile(self, fname, text):
        STXTfile = codecs.open(fname, "w", self.config.fileCodec)
        STXTfile.write(text)
        STXTfile.close()

    def saveToTXT(self):
        print(self.OUTFilename + ' - OUT_FILE')
        res = ""
        if self.metaTitle or self.metaAll:
            res += 'TITLE\n'
            res += self.title + '\n'
        if self.metaContent or self.metaAll:
            res += 'CONTENT\n'
            if self.pdf != None:
                for t in self.pdf.getTitles():
                    res += t + '\n'
        if self.metaName or self.metaAll:
            res += 'NAMES\n'
            for s in self.namesData:
                res += s.genText() + '\n'
        if self.metaLocation or self.metaAll:
            res += 'LOCATIONS\n'
            for s in self.locationsData:
                res += s.genText() + '\n'
        if self.metaKeyWord or self.metaAll:
            res += 'KEY WORDS\n'
            i = 0
            for s in self.keywordsData:
                kp = s.genText()
                if i >= self.config.countKeyPhrases:
                    break
                if len(kp.split()) > self.config.maxKeyPhraseLength or len(
                        kp) < 4:
                    continue
                res += kp + '\n'
                i += 1
            for s in self.keywordsLocData:
                kp = s.genText()
                res += kp + '\n'
        if self.metaRef or self.metaAll:
            res += 'REFS\n'
            for s in self.refs:
                res += s.genText() + '\n'

        #self.saveFile(self.OUTFilename, res)
        return res

    def saveToFGDC(self):
        print(self.OUTFilename + ' - OUT_FILE')
        #STXTfile = codecs.open(self.OUTFilename, "w", self.config.fileCodec)

        year = datetime.date.today().year
        month = datetime.date.today().month
        day = datetime.date.today().day

        res = """
		<?xml version="1.0" encoding="UTF-8"?>
		<metadata xmlns:geonet="http://www.fao.org/geonetwork" xmlns:csw="http://www.opengis.net/cat/csw/2.0.2">
			<idinfo>\n"""

        #if self.metaRef or self.metaAll:
        #	res += "<citation>\n"
        #	for s in self.refs:
        #		res += s.genFGDC()
        #	res += "</citation>\n"
        res += "<citation>\n"
        _title = self.title.replace('\n', ' ')
        res += """
		<citeinfo>
			<origin>""" + self.origin + """</origin>
			<pubdate>""" + self.dateBegin + """</pubdate>
			<title>""" + _title + """</title>
			<onlink></onlink>
		</citeinfo>\n"""
        res += "</citation>\n"

        if self.metaTitle or self.metaAll:

            res += """
				<descript>
					<abstract>""" + self.descriptAbstract + """</abstract>
					<purpose>""" + self.descriptPurpose + """</purpose>
					<supplinf>""" + self.descriptSupplemental + """</supplinf>
				</descript>
		"""

        ## NEED UPDATE
        res += """
			<timeperd>
				<timeinfo>
					<rngdates>
						<begdate>""" + self.dateBegin + """</begdate>
						<enddate>""" + self.dateEnd + """</enddate>
					</rngdates>
				</timeinfo>
				<current>ground condition</current>
			</timeperd>
			<status>
				<progress>""" + self.statusProgress + """</progress>
				<update>""" + self.statusUpdate + """</update>
			</status>
			<accconst>""" + self.access + """</accconst>
			<useconst>
				Data not completely processed; some data experimental.
			</useconst>
		"""

        #if self.metaContent or self.metaAll:
        #	STXTfile.write('CONTENT\n')
        #	for t in self.pdf.getTitles():
        #		STXTfile.write(t+'\n')
        if self.metaLocation or self.metaAll:
            res += """
			<spdom>
			"""
            for s in self.locationsData:
                res += s.genFGDC()
            res += """
			</spdom>
			"""
        if self.metaKeyWord or self.metaAll:
            res += """
			<keywords>
			"""

            kwTypes = {}
            locTypes = {}

            for s in self.keywordsData:
                if s.type in kwTypes:
                    if s.keyword != "":
                        kwTypes[s.type].append(s)
                else:
                    if s.keyword != "":
                        kwTypes[s.type] = []
                        kwTypes[s.type].append(s)

            for s in self.keywordsLocData:
                if s.type in locTypes:
                    if s.keyword != "":
                        locTypes[s.type].append(s)
                else:
                    if s.keyword != "":
                        locTypes[s.type] = []
                        locTypes[s.type].append(s)

            for key in kwTypes.keys():
                res += "<theme>\n"
                res += "<themekt>" + key + "</themekt>\n"
                for val in kwTypes[key]:
                    res += val.genFGDC()
                res += "</theme>\n"

            for key in locTypes.keys():
                res += "<place>\n"
                res += "<placekt>" + key + "</placekt>\n"
                for val in locTypes[key]:
                    res += val.genFGDCloc()
                res += "</place>\n"
            res += """
			</keywords>
			"""
        res += """
		<ptcontac>
			<cntinfo>
				<cntperp>
					<cntper>""" + self.contact.name + """</cntper>
					<cntorg>""" + self.contact.organisation + """</cntorg>
				</cntperp>
				<cntaddr>
					<addrtype>mailing and physical</addrtype>
					<address>
						""" + self.contact.deliveryPoint + """
					</address>
					<city>""" + self.contact.city + """</city>
					<state>""" + self.contact.area + """</state>
					<postal>""" + self.contact.postalCode + """</postal>
					<country>""" + self.contact.country + """</country>
				</cntaddr>
				<cntvoice>""" + self.contact.phone + """</cntvoice>
				<cntfax>""" + self.contact.facs + """</cntfax>
				<cntemail>""" + self.contact.email + """</cntemail>
			</cntinfo>
		</ptcontac>
		"""
        res += """
			</idinfo>
		\n"""

        res += """
			<distinfo>
				<distrib>
					<cntinfo>
						<cntperp>
							<cntper>""" + self.contact.name + """</cntper>
							<cntorg>""" + self.contact.organisation + """</cntorg>
						</cntperp>
						<cntaddr>
							<addrtype>mailing and physical</addrtype>
							<address>
								""" + self.contact.deliveryPoint + """
							</address>
							<city>""" + self.contact.city + """</city>
							<state>""" + self.contact.area + """</state>
							<postal>""" + self.contact.postalCode + """</postal>
							<country>""" + self.contact.country + """</country>
						</cntaddr>
						<cntvoice>""" + self.contact.phone + """</cntvoice>
						<cntfax>""" + self.contact.facs + """</cntfax>
						<cntemail>""" + self.contact.email + """</cntemail>
					</cntinfo>
				</distrib>
				<distliab>[unknown]</distliab>
			</distinfo>
		"""

        res += "<metainfo>\n"

        year = datetime.date.today().year
        month = datetime.date.today().month
        day = datetime.date.today().day

        res += "<metd>" + str(year) + str(month) + str(day) + "</metd>\n"

        if self.metaName or self.metaAll:
            #res += "<ptcontac>"
            res += "<metc>"
            for s in self.namesData:
                res += s.genFGDC()
            res += "</metc>"
            #res += "</ptcontac>"
        res += """
			<metstdn>
				FGDC Content Standard for Digital Geospatial Metadata
			</metstdn>
			<metstdv>FGDC-STD-1998-1</metstdv>
		"""
        res += "</metainfo>\n"

        res += "</metadata>\n"

        #STXTfile.write(res)
        #STXTfile.close()
        #self.saveFile(self.OUTFilename, res)
        return res

    # <meta name="DC.Title" content="Заголовок страницы">
    # <meta name="DC.Creator" content="Имя сайта или создателя страницы">
    # <meta name="DC.Subject" content="Тема содержания ресурса">
    # <meta name="DC.Description" content="Описание страницы">
    # <meta name="DC.Publisher" content="Издатель">
    # <meta name="DC.Contributor" content="Соисполнитель">
    # <meta name="DC.Date" content="Дата создания материала">
    # <meta name="DC.Type" content="Тип ресурса">
    # <meta name="DC.Format" content="Формат ресурса">
    # <meta name="DC.Identifier" content="URL текущей страницы (Идентификатор ресурса)">
    # <meta name="DC.Source" content="Источник данных">
    # <meta name="DC.Language" content="Язык контента">
    # <meta name="DC.Coverage" content="Геотаргетинг">
    # <meta name="DC.Rights" content="Авторские права">
    def saveToDublin(self):
        print(self.OUTFilename + ' - OUT_FILE')
        STXTfile = codecs.open(self.OUTFilename, "w", self.config.fileCodec)

        res = ""
        if self.metaTitle or self.metaAll:
            _date = re.search(r'[0-9]{4}', self.title).group(0)
            if _date is None:
                _date = ""
            _title = self.title.replace('\n', ' ')
            res += "<meta name=\"DC.Title\" content=\"" + _title + "\">\n"
            res += "<meta name=\"DC.Date\" content=\"" + _date + "\">\n"
            res += "<meta name=\"DC.Language\" content=\"en-EN\">\n"
            #res += "<meta name=\"DC.Coverage\" content=\"Eastern Pacific\">"

        #<meta name="DC.Description" content="Описание страницы">
        #if self.metaContent or self.metaAll:
        #	STXTfile.write('CONTENT\n')
        #	for t in self.pdf.getTitles():
        #		STXTfile.write(t+'\n')
        if self.metaName or self.metaAll:
            res += "<meta name=\"DC.Creator\" content=\""
            for s in self.namesData:
                res += s.name + ","
            res = res[0:len(res) - 1]
            res += "\">\n"
        #if self.metaLocation or self.metaAll:
        #	for s in self.locationsData:
        #		res += s.genFGDC()

        if self.metaKeyWord or self.metaAll:
            res += "<meta name=\"DC.Subject\" content=\""
            i = 0
            for s in self.keywordsData:
                kp = s.genText()
                if i >= self.config.countKeyPhrases:
                    break
                if len(kp.split()) > self.config.maxKeyPhraseLength or len(
                        kp) < 4:
                    continue
                res += kp + ","
                i += 1
            res = res[0:len(res) - 1]
            res += "\">\n"

        STXTfile.write(res)
        STXTfile.close()