def read_csv(files): """ Read a group of CSV properly formated CSV files and create the program data structures :param files: dict {file#: ("root", "map_path", "hash_path")} :return: (mappings dict, file_hash dict) mappings dict: {(file#,line#): ("old_path", "int_path", "ext_path",...)} file_hash dict: {(file#, "path"): hash} """ mappings = {} file_hash = {} for file_num in files: root, map_path, hash_path = files[file_num] if map_path is not None: with csv23.open(map_path, "r") as csv_file: csv_reader = csv.reader(csv_file) next(csv_reader) # ignore the header line_num = 1 for row in csv_reader: row = csv23.fix(row) line_num += 1 mappings[(file_num, line_num)] = tuple(row) if hash_path is not None: with csv23.open(hash_path, "r") as csv_file: csv_reader = csv.reader(csv_file) next(csv_reader) # ignore the header for row in csv_reader: row = csv23.fix(row) path = os.path.join(row[0], row[1]) file_hash[(file_num, path)] = row[2] if len(mappings) == 0: mappings = None if len(file_hash) == 0: file_hash = None return mappings, file_hash
def unzip(): """ Just do it """ with csv23.open(Config.csv_path, "r") as handle: handle.readline() # remove header csvreader = csv.reader(handle) for row in csvreader: row = csv23.fix(row) park = row[0] plot = row[1] date = row[5] suffix = row[6] source = row[13] if source: local = source.replace(Config.remote, Config.local_zip) dest = os.path.join(Config.local_img, park, plot, date) if suffix: dest += suffix print("unzip", local, dest) try: os.makedirs(dest) except OSError: print("Failed to create dir", dest) continue with zipfile.ZipFile(local, "r") as zip_ref: zip_ref.extractall(dest)
def readfile(filename): """Read the input CSV data in filename (formatted as described above).""" data = {} with csv23.open(filename, "r") as in_file: reader = csv.reader(in_file) next(reader) # ignore the header for row in reader: row = csv23.fix(row) # print(row[2:7]) site = row[0] team = row[2] quad = row[3] name = "{0}|{1}{2}".format(site, team, quad) corner = int(row[4][-1:]) if corner not in (1, 2): print(site, team, name, name, corner) continue # pylint: disable=invalid-name # I think x,y,z, while too short, are the best variables names here. x = float(row[5]) y = float(row[6]) z = float(row[7]) if name not in data: data[name] = {corner: (x, y, z)} else: data[name][corner] = (x, y, z) return data
def check_paths(csv_path): """Check that the paths in csv_path exist in the filesystem.""" line = 0 missing_paths = [] with csv23.open(csv_path, "r") as csv_file: csv_reader = csv.reader(csv_file) for row in csv_reader: row = csv23.fix(row) line += 1 if line == 1: # skipping the header continue name = row[Config.name_index] folder = row[Config.folder_index] if not name or not folder: print("Bad record at line {0}".format(line)) continue if Config.path_root is None: path = os.path.join(folder, name) else: path = os.path.join(Config.path_root, folder, name) if not os.path.exists(path): missing_paths.append((path, line)) for path, line in sorted(missing_paths): print("Path `{0}` not found at line {1}".format(path, line))
def check_unusual(data): substitutions = [ ("ALBERS", "AKR"), ("STATEWID", "STATEWIDE"), ("SUBSIST", "SUBSISTENCE"), ] for row in data: row = csv23.fix(row) old = row[0].upper() new = row[1].upper() ext = row[2].upper() if new and ext: print("ERROR: multiple destinations old:{0}, new:{1}, ext:{2}". format(old, new, ext)) else: match = False if new: if old == new: continue for (s, r) in substitutions: if old.replace(s, r) == new: match = True break if not match: print("Moving old:{0} to new:{1}".format(old, new)) if ext: if old == ext: continue for (s, r) in substitutions: if old.replace(s, r) == ext: match = True break if not match: print("Moving old:{0} to ext:{1}".format(old, ext))
def read_csv(csv_path): """Read the list of photos from csv_path.""" rows = [] with csv23.open(csv_path, "r") as csv_file: csv_reader = csv.reader(csv_file) next(csv_reader) # skip the header for row in csv_reader: row = csv23.fix(row) rows.append(row) return rows
def read_csv_map(csv_path): mappings = {} with csv23.open(csv_path, "r") as csv_file: csv_reader = csv.reader(csv_file) next(csv_reader) # ignore the header for row in csv_reader: row = csv23.fix(row) old_path = row[0] new_path = row[2] if row[1] is None else row[1] mappings[old_path] = new_path return mappings
def load_csv_file(csv_path): """Return a list of the rows in the CSV.""" records = [] with csv23.open(csv_path, "r") as csv_file: csv_reader = csv.reader(csv_file) next(csv_reader) # ignore the header for row in csv_reader: row = csv23.fix(row) records.append(row) return records
def read_csv_map(csv_path): mappings = {} with csv23.open(csv_path, "r") as csv_file: csv_reader = csv.reader(csv_file) next(csv_reader) # ignore the header for row in csv_reader: row = csv23.fix(row) old_fgdb = row[0] mosaic = row[1] old_path = row[2] new_fgdb = row[3] new_path = row[4] new_fgdb_mosaic = os.path.join(new_fgdb, mosaic) if new_fgdb_mosaic not in mappings: mappings[new_fgdb_mosaic] = [] mappings[new_fgdb_mosaic].append((old_path, new_path)) return mappings
def files_in_csv(csv_path): """Return a set of standardized relative paths to photos in the file at csv_path.""" paths = set() with csv23.open(csv_path, "r") as csv_file: csv_reader = csv.reader(csv_file) next(csv_reader) # skip the header for row in csv_reader: row = csv23.fix(row) path = row[Config.name_index] if Config.folder_index is not None: folder = row[Config.folder_index] path = os.path.join(folder, path) if Config.unit_index is not None: folder = row[Config.unit_index] path = os.path.join(folder, path) path = standardize(path) paths.add(path) return paths
def main(): """ Just do it """ with csv23.open(Config.csv_path, "r") as handle: csvreader = csv.reader(handle) next(csvreader) # remove header for row in csvreader: row = csv23.fix(row) source = row[13] if source: local = source.replace(Config.remote, Config.local_zip) # print("copy", source, local) if not os.path.exists(source): print("File not found", source) else: print("copy {0}".format(os.path.basename(local))) try: shutil.copy(source, local) except IOError: os.makedirs(os.path.dirname(local)) shutil.copy(source, local)
def check_unique_sources(data): """ All the text in the first column (source folders) must have a unique prefix. i.e. images/dena is invalid if images/dena/ikonos is defined :param data: an iterable of lists :return: invalid source locations """ # build a sorted list of (normalized) sources sources = [csv23.fix(row)[0].upper() for row in data] sources.sort() # shorter strings are listed first # for each item, I only need to check the next item in the list (do not check the last item) invalid = [] for index, value in enumerate(sources[:-1]): if sources[index + 1].startswith(value): invalid.append(value) # output to console problems = invalid if 0 < len(problems): print("{0} source prefixes are not unique".format(len(problems))) for problem in problems: print(problem)
import csv import os import csv23 tm_filesystem = r"X:\GIS\ThemeMgr" tm_database = r"data\TM_20171206.csv" unique_themes = set([]) with csv23.open(tm_database, "r") as csv_file: csv_reader = csv.reader(csv_file) next(csv_reader) # ignore the header for row in csv_reader: row = csv23.fix(row) theme = row[3] unique_themes.add(theme) print("Missing Themes:") for theme in sorted(unique_themes): if theme and not os.path.exists(theme): print(" " + theme) print("Extra Themes:") for root, dirs, files in os.walk(tm_filesystem): if ".git" in dirs: dirs.remove(".git") for name in files: base, ext = os.path.splitext(name) if ext.lower() != ".xml":
def organize(): """Add additional attributes to the ifsar file list.""" # pylint: disable=too-many-locals,too-many-branches,too-many-statements with csv23.open(Config.out_path, "w") as out_file: csv_writer = csv.writer(out_file) header = [ "folder", "filename", "ext", "size", "legacy", "nga", "kind", "edge", "cell", "lat", "lon", "tfw", "xml", "html", "txt", "tif_xml", "ovr", "aux", "rrd", "aux_old", "crc", "extras", "skip", ] csv23.write(csv_writer, header) with csv23.open(Config.csv_path, "r") as csv_file: csv_reader = csv.reader(csv_file) next(csv_reader) # ignore the header for row in csv_reader: row = csv23.fix(row) path_in, name, ext, size = row name = name.lower() path = path_in.lower() legacy = "N" if "legacy" in path: legacy = "Y" nga = "N" if "nga_30" in path: nga = "Y" kind = "" if "ori" in name or ("ori" in path and "priority" not in path): kind = "ori" if kind == "ori" and "_sup" in name: kind = "ori_sup" if "dsm" in name or "dsm" in path: kind = "dsm" if "dtm" in name or "dtm" in path: kind = "dtm" edge = "N" if "edge" in path: edge = "Y" cell = "" lat, lon = 0, 0 match = re.search(r"\\cell_(\d*)\\", path) if match: cell = match.group(1) match = re.search(r"\\cell_([def])\\", path) # path -> 196, e -> 197, f -> 198 if match: cell = ord(match.group(1)) - ord("d") + 196 match = re.search(r"_n(\d*)w(\d*)", name) if match: lat = int(match.group(1)) / 100 lon = int(match.group(2)) / 100 # # Check for supplemental *.html, *.aux.xml, etc files # file_path = os.path.join(path_in, name) exts_found = [ sup.replace(file_path, "").lower() for sup in glob.glob(file_path + ".*") ] # exts_possible = ['.tif', '.tfw','.xml','.html','.txt','.tif.xml', # '.tif.ovr','.tif.aux.xml', '.rrd', '.aux', '.tif.crc'] tfw, xml, html, txt, tif_xml = 0, 0, 0, 0, 0 ovr, aux, rrd, aux_old, crc = 0, 0, 0, 0, 0 if ".tfw" in exts_found: tfw = 1 if ".xml" in exts_found: xml = 1 if ".html" in exts_found: html = 1 if ".txt" in exts_found: txt = 1 if ".tif.xml" in exts_found: tif_xml = 1 if ".tif.ovr" in exts_found: ovr = 1 if ".tif.aux.xml" in exts_found: aux = 1 if ".rrd" in exts_found: rrd = 1 if ".aux" in exts_found: aux_old = 1 if ".tif.crc" in exts_found: crc = 1 extras = (len(exts_found) - 1 - tfw - xml - html - txt - tif_xml - ovr - aux - rrd - aux_old - crc ) # 1 for the tif that must exist out_row = [ path_in, name, ext, size, legacy, nga, kind, edge, cell, lat, lon, tfw, xml, html, txt, tif_xml, ovr, aux, rrd, aux_old, crc, extras, "N", ] csv23.write(csv_writer, out_row)
def load_csv(csv_path, col=0): with csv23.open(csv_path, "r") as csv_file: csv_reader = csv.reader(csv_file) next(csv_reader) # ignore the header data = [csv23.fix(row)[col] for row in csv_reader] return data