Beispiel #1
0
def main():

    ft = FileType()

    setup_dir(TEMP_DIR)

    ftype = ft.get_type(fname)

    ftff = FeedToFlatFiles(TEMP_DIR)

    if ftype == "xml":
        ftff.process_feed(fname)
    elif ft.is_compression(ftype) or ft.is_archived(ftype):
        unpack = Unpack(fname, TEMP_DIR)
        unpack.flatten_folder()
        xml_file = unpack.find_file_by_extension(".xml")
        if xml_file:
            ftff.process_feed(xml_file)

    fc = FormatCheck(urlopen(SCHEMA_URL), TEMP_DIR)
    fc.validate_and_clean()
    # need to get error report here

    write_and_archive(fc.get_valid_files(), fc.get_vip_id())
Beispiel #2
0
class Unpack:

	def __init__(self, file_name, extract_path=None):

		self.ft = FileType()

		self.extract_path = extract_path

		if os.path.exists(file_name) and self.extract_path and not os.path.exists(self.extract_path):
			os.makedirs(self.extract_path)

		if os.path.isdir(file_name) and self.extract_path:
			shutil.copytree(file_name, self.extract_path)
			file_name = self.extract_path
		elif os.path.isfile(file_name) and self.extract_path:
			shutil.copy(file_name, self.extract_path)
			file_name = self.extract_path
		elif os.path.isdir(file_name):
			self.extract_path = file_name
		else:
			self.extract_path = os.getcwd()

		if os.path.isdir(file_name):
			self.unpack_dir(file_name)
		else:
			self.unpack_file(file_name)

	def uncompress(self, fname):
		ftype = self.ft.get_type(fname)
	
		if ftype == "gz":
			ext = GzipFile(fname, 'rb')
		elif ftype == "bz2":
			ext = BZ2File(fname, 'rb')

		filedata = ext.read()
		new_name = fname[:fname.rfind(".")]
		w = open(new_name, "w")
		w.write(filedata)

		new_type = self.ft.get_type(new_name)
		if new_type:
			os.rename(new_name, new_name + "." + new_type)
			return new_name + "." + new_type
		return new_name

	def unarchive(self, fname):
		ftype = self.ft.get_type(fname)
	
		if ftype == "rar":
			ext = RarFile(fname)
		elif ftype == "tar":
			ext = tarfile.open(fname)
		elif ftype == "zip":
			ext = ZipFile(fname)

		new_path = fname[:fname.rfind(".")] + "_extracted"
		if not os.path.exists(new_path):
			os.makedirs(new_path)
		ext.extractall(path=new_path)
		return new_path

	def unpack_dir(self, directory):
		for root, dirs, dirfiles in os.walk(directory):
			for name in dirfiles:
				full_name = root + "/" + name
				self.unpack_file(full_name)

	def unpack_file(self, fname):
		if self.ft.is_compression(fname) or self.ft.is_archived(fname):
		
			if self.ft.is_compression(fname):
				new_file = self.uncompress(fname)
			elif self.ft.is_archived(fname):
				new_file = self.unarchive(fname)
		
			if fname != new_file:
				os.remove(fname)
		
			if os.path.isdir(new_file):
				self.unpack_dir(new_file)
			else:
				self.unpack_file(new_file)

	def find_file_by_name(self, file_name):
		
		return self.find_files_by_name(file_name)[0]

	def find_files_by_name(self, file_name):
		return self.find_files(re.compile(file_name))
	
	def find_file_by_extension(self, file_extension):

		return self.find_files_by_extension(file_extension)[0]

	def find_files_by_extension(self, file_extension):
		
		if not file_extension.startswith("."):
			file_extension = "." + file_extension
		return self.find_files(re.compile(".*\\" + file_extension))

	def find_file_by_partial(self, file_partial):

		return self.find_files_by_partial(file_partial)[0]

	def find_files_by_partial(self, file_partial):
		
		return self.find_files(re.compile(".*" + file_partial + ".*"))

	def find_files(self, regex):
		
		file_list = []

		for root, dirs, dirfiles in os.walk(self.extract_path):
			for name in dirfiles:
				if regex.match(name) and name.find("/.") < 0:
					file_list.append(root + "/" + name)
		if len(file_list) > 0:
			return file_list

	def get_file_list(self):
		
		return self.find_files(re.compile(".*"))

	def find_folder_by_name(self, folder_name):

		return self.find_folder(re.compile(folder_name))

	def find_folder_by_partial(self, folder_partial):
		
		return self.find_folder(re.compile(".*" + folder_partial + ".*"))

	def find_folder(self, regex):

		for root, dirs, dirfiles in os.walk(self.extract_path):
			for dir_name in dirs:
				if regex.match(dir_name) and dir_name.find("/.") < 0:
					return root + "/" + dir_name

	def get_folder_list(self):

		folder_list= []

		for root, dirs, dirfiles in os.walk(self.extract_path):
			for dir_name in dirs:
				return folder_list.append(root + "/" + dir_name)
		if len(folder_list) > 0:
			return folder_list

	def flatten_folder(self):
	
		try:
			if self.extract_path == os.getcwd():
				raise SameDirError
		except SameDirError:
			raise
		
		for root, dirs, files in os.walk(self.extract_path):
			if root != self.extract_path:
				for name in files:
					shutil.move(root + "/" + name, self.extract_path + "/" + name)
		for root, dirs, files in os.walk(self.extract_path):
			for d in dirs:
				shutil.rmtree(os.path.join(root, d))