def mbox_lister(search_date, quarters_path, fail_object=None): """mbox_lister(datetime.date, quarters_path, fail_object) --> Set([string_1, ..., string_n]) Searches for mailboxes of a determined date received as argument. Returns a set with the path of such mailboxes.""" # Verifies whether the type of the argument received is "date" if not isinstance(search_date, date): raise TypeError, "The argument \"search_date\" must be date typed;" # Gets the quarter which the date received belongs to quarter = datequarter(search_date) + 1 # Compiling a regular expression to match the quarter in the path quarter_ex = compile(str(search_date.year)+"-Q"+str(quarter)) # Search for a path that holds the calculated quarter for quarter_path in recursive_walk(quarters_path, maxdepth=0, dirs_only=True): if search(quarter_ex, quarter_path): break else: quarter_path = "" # Once there's no path within the determined quarter, returns the fail_object if quarter_path == "": return fail_object # Creates a list to store logfile paths logfile_list = list() # Search for logfiles produced in the received date for logfile_path in recursive_walk(quarter_path, files_only=True): if basename(logfile_path) == "logfile." + str(search_date): logfile_list.append(logfile_path) # Once there's no logfile path in the list, returns the fail_object if len(logfile_list) == 0: return fail_object # Creates a set to store mailbox paths mailbox_set = set() # Reads the logfiles formerly found in search of mailboxes for logfile_path in logfile_list: mailbox_set |= logfile_mboxset(logfile_path) return mailbox_set
def __init__(self, directory): subject_mapping = {} with open(configs.MAPPING_FILE) as mappping_file: mappping_file = mappping_file.read().strip().split('\n') for line in mappping_file: id = re.findall(r'^(\d+)\s', line)[0] name = line[len(id) + 1:] subject_mapping[name] = id def getSubject(name): return int(subject_mapping[name]) # self.dataset = SingleDataset() subject_dirs = fileutils.listdir(directory) ann = Annotation() prev_subject_id = None sjList = None for dir in subject_dirs: subject_name = fileutils.dirname(dir) subject_id = getSubject(subject_name) duplicate = None for sj in self.dataset.subjects: if sj.id == subject_id and sj.name == subject_name: duplicate = sj ## if duplicate == None: subject = Subject(subject_id, subject_name) else: subject = duplicate if prev_subject_id != subject_id: sjList = ann.getSubjectList(subject_id) prev_subject_id = subject_id for file in fileutils.recursive_walk(dir): if fileutils.fileextension(file) == configs.LAYER: ### filename = fileutils.filename(file) rc = Record() rc.data = blob.load_np_array(file) rc.label = int(Annotation.getClass(sjList, filename)) rc.frame = filename subject.records.append(rc) print('Loading', subject_id, filename, rc.label) if(duplicate == None): self.dataset.subjects.append(subject)
def rsync_done_opener(main_path): """rsync_done_opener(string) --> array(file_descriptor) Receives an initial path to search for "rsync_done" files. Yields an array with such paths. Opens each path and returns an array with such opened files.""" # Creates a list to store "rsync_done" files descriptor rsync_done_files = list() for file_path in recursive_walk(main_path, files_only=True): if basename(file_path) == RSYNC_PATTERN: # Opens each file try: rsync_done_files.append(open(file_path, "r")) except IOError: print >> stderr, "Invalid argument - [" + path + "] is not an openable file." continue return rsync_done_files