def __init__(\
                 self, \
                 path = "", \
                 prefix = "", \
                 dbstring = "", \
                 logfile = None, \
                 options = {'g':'geom', 'D':True, 'I':True, 'S':True}, \
                 sep = ',', \
                 text_format = '.txt', \
                 encoding = 'UTF8', \
                 copymode = True, \
                 doclean = True, \
                 subs = {} \
             ):
     self.path = path
     self.prefix = prefix
     self.sqlfile = ""
     self.copymode = copymode
     self.doclean = doclean
     self.encoding = encoding
     self.sep = sep
     self.text_format = text_format
     self.dbstring = dbstring
     self.logfile = logfile
     self.substitutions = subs
     self.found_import_dbfshpfiles = []
     self.found_import_csvtxtfiles = []
     self.sloader = ShpLoader(dbstring = self.dbstring, schema = TEMPSCHEMA, logfile = self.logfile, options = options, doclean = doclean)
     self.ploader = PsqlLoader(dbstring = self.dbstring, logfile=self.logfile)
Beispiel #2
0
 def __init__(self, source = "", prefix = "", dbstring = "", logfile = None,
              options = {'g':'geom', 'D':True, 'I':True, 'S':True}, doclean = True, subs = {}):
     super(ShpImporter, self).__init__(source, dbstring, logfile, doclean, subs)
     self.shapefiles = []
     if isinstance(self.source, list):
         for source in self.source:
             print "Importing source {}".format(source)
             self.prefix = self.get_prefix(source, prefix)
             self.get_shapefiles(source)
     else:
         self.prefix = self.get_prefix(self.source, prefix)
         self.get_shapefiles(source)
         pass
     self.sloader = ShpLoader(dbstring = dbstring, schema = IMPORTSCHEMA,
             logfile = self.logfile, options = options, doclean = doclean)
Beispiel #3
0
 def __init__(self, source = "", prefix = "", dbstring = "", logfile = None,
         options = {'g':'geom', 'D':True, 'I':True, 'S':True}, doclean = True):
     super(ShpImporter, self).__init__(source, dbstring, logfile, doclean)
     self.shapefiles = []
     self.prefix = self.get_prefix(prefix)
     self.get_shapefiles()
     self.sloader = ShpLoader(dbstring = dbstring, schema = IMPORTSCHEMA,
             logfile = self.logfile, options = options, doclean = doclean)
Beispiel #4
0
 def __init__(self,
              source="",
              prefix="",
              dbstring="",
              logfile=None,
              options={
                  'g': 'geom',
                  'D': True,
                  'I': True,
                  'S': True
              },
              doclean=True):
     super(ShpImporter, self).__init__(source, dbstring, logfile, doclean)
     self.shapefiles = []
     self.prefix = self.get_prefix(prefix)
     self.get_shapefiles()
     self.sloader = ShpLoader(dbstring=dbstring,
                              schema=IMPORTSCHEMA,
                              logfile=self.logfile,
                              options=options,
                              doclean=doclean)
Beispiel #5
0
class ShpImporter(DataImporter):
    """This class enables to load shapefile data into a PostGIS database."""
    # Shapefile names to load, without the extension and prefix. It will be the table name.
    SHAPEFILES = []
    # Optional shapefiles
    OPT_SHAPEFILES = []
    # SQL files to execute before loading shapefiles
    PRELOADSQL = []
    # SQL files to execute after loading shapefiles
    POSTLOADSQL = []

    def __init__(self, source = "", prefix = "", dbstring = "", logfile = None,
                 options = {'g':'geom', 'D':True, 'I':True, 'S':True}, doclean = True, subs = {}):
        super(ShpImporter, self).__init__(source, dbstring, logfile, doclean, subs)
        self.shapefiles = []
        if isinstance(self.source, list):
            for source in self.source:
                print "Importing source {}".format(source)
                self.prefix = self.get_prefix(source, prefix)
                self.get_shapefiles(source)
        else:
            self.prefix = self.get_prefix(self.source, prefix)
            self.get_shapefiles(source)
            pass
        self.sloader = ShpLoader(dbstring = dbstring, schema = IMPORTSCHEMA,
                logfile = self.logfile, options = options, doclean = doclean)

    def check_input(self):
        """Check if data input is ok : we have the required number of shapefiles."""
        res = set(self.SHAPEFILES).issubset(set([s for s,_ in self.shapefiles]))
        if not res:
            raise StandardError ("Some input files missing. Check data source.")

    def load_data(self):
        """Load all given shapefiles into the database."""
        ret = True
        created_tables = set()
        for i, s in enumerate(self.shapefiles):
            shp, rshp = s
            # if one shapefile failed, stop there
            if ret:
                self.sloader.set_shapefile(rshp)
                # the table name is the shapefile name without extension
                self.sloader.set_table(shp)
                if shp in created_tables:
                    self.sloader.options['mode'] = 'a'
                else:
                    self.sloader.options['mode'] = 'c'                    
                    created_tables.add(shp)
                ret = self.sloader.load()
        return ret

    def set_dbparams(self, dbstring=""):
        super(ShpImporter, self).set_dbparams(dbstring)
        self.sloader.set_dbparams(dbstring)

    def get_prefix(self, source, prefix = ""):
        """Get prefix for shapefiles. If given prefix is empty, try to find it browsing the directory."""
        myprefix = ""
        if prefix:
            myprefix = prefix
        else:
            # prefix has not been given, try to deduce it from files
            if source:
                prefixes = []
                if not os.path.isdir(source):
                    print "{} n'est pas un dir".format(source)
                    return ''
                for filename in os.listdir(source):
                    for shp in self.SHAPEFILES:
                        # if we find the table name at the end of the file name (w/o ext), add prefix to the list
                        # only check dbf and shp
                        basename, ext = os.path.splitext(os.path.basename(filename))
                        if ext.lower() in ['.dbf', '.shp'] and basename[-len(shp):] == shp:
                            curprefix = basename[:-len(shp)]
                            # only consider prefixes with "_"
                            if '_' in curprefix and curprefix not in prefixes:
                                prefixes.append(curprefix)
                # if only one prefix found, use it !
                if len(prefixes) > 1:
                    sys.stderr.write("Cannot determine prefix, multiple found : %s \n" % ",".join(prefixes))
                elif len(prefixes) == 1:
                    return prefixes[0]
                else:
                    return ''
        return myprefix

    def get_shapefiles(self, source):
        notfound = []

        baseDir = os.path.realpath(source)
        ls = os.listdir(baseDir)
        for shp in self.OPT_SHAPEFILES:
            filenameShp = self.prefix + shp + ".shp"
            filenameDbf = self.prefix + shp + ".dbf"
            lsLower = [x.lower() for x in ls]
            if filenameShp in lsLower:
                i = lsLower.index(filenameShp)
                self.shapefiles.append((shp, os.path.join(baseDir, ls[i])))
            elif filenameDbf in lsLower:
                i = lsLower.index(filenameDbf)
                self.shapefiles.append((shp, os.path.join(baseDir, ls[i])))
        for shp in self.SHAPEFILES:
            filenameShp = self.prefix + shp + ".shp"
            filenameDbf = self.prefix + shp + ".dbf"
            lsLower = [x.lower() for x in ls]
            if filenameShp in lsLower:
                i = lsLower.index(filenameShp)
                self.shapefiles.append((shp, os.path.join(baseDir, ls[i])))
            elif filenameDbf in lsLower:
                i = lsLower.index(filenameDbf)
                self.shapefiles.append((shp, os.path.join(baseDir, ls[i])))
            else:
                notfound.append(filenameDbf)
                sys.stderr.write("Warning : file for table %s not found.\n"\
                                     "%s not found\n" % (shp, filenameDbf))
        return notfound
class DataDirManager(object):
    """This class enables to load data stored in an unzipped directory."""
    # SQL files to execute before importing/exporting files
    PRE_SQL = []
    # Shapefile names to load, without the extension and prefix. It will be the table name.
    IMPORT_DBFSHPFILES = []
    # CSV files to load
    IMPORT_CSVTXTFILES = []
    # Shapefile names to load, without the extension and prefix. It will be the table name.
    EXPORT_DBFSHPFILES = []
    # CSV files to load
    EXPORT_CSVTXTFILES = []
    # SQL files to execute after importing/exporting files
    POST_SQL = []
    
    def __init__(\
                    self, \
                    path = "", \
                    prefix = "", \
                    dbstring = "", \
                    logfile = None, \
                    options = {'g':'geom', 'D':True, 'I':True, 'S':True}, \
                    sep = ',', \
                    text_format = '.txt', \
                    encoding = 'UTF8', \
                    copymode = True, \
                    doclean = True, \
                    subs = {} \
                ):
        self.path = path
        self.prefix = prefix
        self.sqlfile = ""
        self.copymode = copymode
        self.doclean = doclean
        self.encoding = encoding
        self.sep = sep
        self.text_format = text_format
        self.dbstring = dbstring
        self.logfile = logfile
        self.substitutions = subs
        self.found_import_dbfshpfiles = []
        self.found_import_csvtxtfiles = []
        self.sloader = ShpLoader(dbstring = self.dbstring, schema = TEMPSCHEMA, logfile = self.logfile, options = options, doclean = doclean)
        self.ploader = PsqlLoader(dbstring = self.dbstring, logfile=self.logfile)
    
    def clean(self):
        """Remove previously generated SQL file."""
        if os.path.isfile(self.sqlfile):
            os.remove(self.sqlfile)
    
    def check_input(self):
        """ Update self.found_import_dbfshpfiles and self.found_import_csvtxtfiles"""
        if (self.IMPORT_CSVTXTFILES != [] or self.IMPORT_DBFSHPFILES != []):
            if isinstance(self.path, list):
                for path in self.path:
                    print "Path {}".format(path)
                    self.prefix = self.get_prefix(path, self.prefix)
                    self.get_dbfshpfiles(path)
                    self.get_csvtxtfiles(path)
            else:
                print "Path {}".format(self.path)
                self.prefix = self.get_prefix(self.path, self.prefix)
                self.get_dbfshpfiles(self.path)
                self.get_csvtxtfiles(self.path)
                pass
            """Check if we have the required files."""
            filelist = set([s for s,_ in self.found_import_csvtxtfiles])
            for f, mandatory in self.IMPORT_CSVTXTFILES:
                if mandatory and "%s.txt" % f not in filelist and "%s.csv" % f not in filelist:
                    raise StandardError("Missing mandatory file: %s.txt or %s.csv" % f)
            filelist = set([s for s,_ in self.found_import_dbfshpfiles])
            for f, mandatory in self.IMPORT_DBFSHPFILES:
                if mandatory and "%s.shp" % f not in filelist and "%s.dbf" % f not in filelist:
                    raise StandardError("Missing mandatory file: %s.shp or %s.dbf" % f) 
    
    def get_prefix(self, path, prefix = ""):
        """Get prefix for shapefiles. If given prefix is empty, try to find it browsing the directory."""
        myprefix = ""
        if (self.IMPORT_CSVTXTFILES != [] or self.IMPORT_DBFSHPFILES != []):
            if prefix:
                myprefix = prefix
            else:
                # prefix has not been given, try to deduce it from files
                prefixes = []
                if os.path.isdir(path):
                    for filename in os.listdir(os.path.realpath(path)):
                        for tablename, mandatory in self.IMPORT_DBFSHPFILES:
                            # if we find the table name at the end of the file name (w/o ext), add prefix to the list
                            # only check dbf and shp
                            basename, ext = os.path.splitext(os.path.basename(filename))
                            if ext.lower() in ['.dbf', '.shp'] and basename[-len(tablename):] == tablename:
                                curprefix = basename[:-len(tablename)]
                                # only consider prefixes with "_"
                                if '_' in curprefix and curprefix not in prefixes:
                                    prefixes.append(curprefix)
                        for tablename, mandatory in self.IMPORT_CSVTXTFILES:
                            # if we find the table name at the end of the file name (w/o ext), add prefix to the list
                            # only check csv and txt
                            basename, ext = os.path.splitext(os.path.basename(filename))
                            if ext.lower() in ['.csv', '.txt'] and basename[-len(tablename):] == tablename:
                                curprefix = basename[:-len(tablename)]
                                # only consider prefixes with "_"
                                if '_' in curprefix and curprefix not in prefixes:
                                    prefixes.append(curprefix)                
                    # if only one prefix found, use it !
                    if len(prefixes) > 1:
                        sys.stderr.write("Cannot determine prefix, multiple found : %s \n" % ",".join(prefixes))
                    elif len(prefixes) == 1:
                        return prefixes[0]
                    else:
                        return ''
                else:
                    raise StandardError("%s is not a directory" % path)
        return myprefix
    
    def get_dbfshpfiles(self, path):
        notfound = []
        if (self.IMPORT_CSVTXTFILES != [] or self.IMPORT_DBFSHPFILES != []):
            ls = os.listdir(os.path.realpath(path))
            for tablename, mandatory in self.IMPORT_DBFSHPFILES:
                filenameShp = (self.prefix + tablename + ".shp").lower()
                filenameDbf = (self.prefix + tablename + ".dbf").lower()
                lsLower = [x.lower() for x in ls]
                if filenameShp in lsLower:
                    i = lsLower.index(filenameShp)
                    self.found_import_dbfshpfiles.append((tablename, os.path.join(os.path.realpath(path), ls[i])))
                elif filenameDbf in lsLower:
                    i = lsLower.index(filenameDbf)
                    self.found_import_dbfshpfiles.append((tablename, os.path.join(os.path.realpath(path), ls[i])))
                elif mandatory == True:
                    notfound.append(tablename)
                    sys.stderr.write("Warning: file for table %s not found.\n"\
                                         "%s and %s not found\n" % (tablename, filenameShp, filenameDbf))
        return notfound
    
    def get_csvtxtfiles(self, path):
        notfound = []
        if (self.IMPORT_CSVTXTFILES != [] or self.IMPORT_DBFSHPFILES != []):
            ls = os.listdir(os.path.realpath(path))
            for tablename, mandatory in self.IMPORT_CSVTXTFILES:
                filenameCsv = (self.prefix + tablename + ".csv").lower()
                filenameTxt = (self.prefix + tablename + ".txt").lower()
                lsLower = [x.lower() for x in ls]
                if filenameCsv in lsLower:
                    i = lsLower.index(filenameCsv)
                    self.found_import_csvtxtfiles.append((tablename, os.path.join(os.path.realpath(path), ls[i])))
                elif filenameTxt in lsLower:
                    i = lsLower.index(filenameTxt)
                    self.found_import_csvtxtfiles.append((tablename, os.path.join(os.path.realpath(path), ls[i])))
                elif mandatory == True:
                    notfound.append(tablename)
                    sys.stderr.write("Warning: file for table %s not found.\n"\
                                         "%s and %s not found\n" % (tablename, filenameCsv, filenameTxt))
        return notfound
    
    def run(self):
        ret = True
        try:
            self.check_input()
        except StandardError as e:
            sys.stderr.write("During import: %s\n" % e.message)
            return False
        
        ret = self.execute_sqlfiles(self.PRE_SQL)
        if ret:
            print "pre_sql() done.\n"
            ret = self.import_dbfshpfiles()
        else:
            sys.stderr.write("Error during pre_sql().\n")
        if ret:
            print "import_dbfshpfiles() done.\n"
            ret = self.import_csvtxtfiles()
        else:
            sys.stderr.write("Error during import_dbfshpfiles().\n")
        if ret:
            print "import_csvtxtfiles() done.\n"
            ret = self.export_dbfshpfiles()
        else:
            sys.stderr.write("Error during import_csvtxtfiles().\n")
        if ret:
            print "export_dbfshpfiles() done.\n"
            ret = self.export_csvtxtfiles()
        else:
            sys.stderr.write("Error during export_dbfshpfiles().\n")
        if ret:
            print "export_csvtxtfiles() done.\n"
            ret = self.execute_sqlfiles(self.POST_SQL)
        else:
            sys.stderr.write("Error during export_csvtxtfiles().\n")
        if ret:
            print "post_sql() done.\n"
            self.clean()
        else:
            sys.stderr.write("Error during post_sql().\n")
        return ret
    
    def execute_sqlfiles(self, files, substitute = True): 
        """Load some SQL files to the defined database.
        Stop if one was wrong."""
        ret = True
        is_template = substitute and len(self.substitutions) > 0
        for sqlfile in files:
            filename = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'sql', sqlfile)
            # Stop if one SQL execution was wrong
            if ret and os.path.isfile(filename):
                if is_template:
                    f = open(filename, 'r')
                    template = f.read()
                    self.ploader.set_from_template(template, self.substitutions)
                else:
                    self.ploader.set_sqlfile(filename)
                ret = self.ploader.load()
        return ret
    
    def import_dbfshpfiles(self):
        """Load all given shapefiles into the database."""
        ret = True
        created_tables = set()
        for line_number, file in enumerate(self.found_import_dbfshpfiles):
            tablename, filepath = file
            # if one shapefile failed, stop there
            if ret:
                self.sloader.set_shapefile(filepath)
                # the table name is the shapefile name without extension
                self.sloader.set_table(tablename)
                if tablename in created_tables:
                    self.sloader.options['mode'] = 'a'
                else:
                    self.sloader.options['mode'] = 'c'                    
                    created_tables.add(tablename)
                ret = self.sloader.load()
        return ret
        
         
    def import_csvtxtfiles(self):
        ret=True
        for line_number, file in enumerate(self.found_import_csvtxtfiles):
            tablename, filepath = file
            # If one csvfile failed, stop here
            if ret:
                self.sqlfile = self.generate_sql_from_csv(tablename, filepath)
                ret = self.execute_sqlfiles([self.sqlfile], substitute = False)
                if self.doclean:
                    self.clean() 
        return ret
    
    
    def export_dbfshpfiles(self):
        ret = True
        for f in self.EXPORT_DBFSHPFILES:
            filename = self.path+"/"+f+'.shp'             
            command = [ PGSQL2SHP, "-f", filename, "-h", self.dbparams['host'], "-u", self.dbparams['user'], "-p", self.dbparams['port'] ]
            if 'password' in self.dbparams.keys():
                command.append("-P %s" % self.dbparams['password'])
            command.append(self.dbparams['dbname'])
            command.append("tempus.road_%s" % f)
                        
            if self.logfile:
                outerr = open(self.logfile, "a")
            else:
                outerr = sys.stderr

            outerr.write("\n======= PGSQL2SHP %s\n" % f)
            rescode = -1
            try:
                rescode = subprocess.call(command, stderr = outerr) 
            except OSError as (errno, strerror):
                sys.stderr.write("Error calling %s (%s) : %s \n" % (" ".join(command), errno, strerror))
            if rescode != 0: ret = False
            if self.logfile:
                outerr.close() 
        return ret