def update(self, interesting_turn=None): ''' Retrieve a turn from the dropbox either the current latest turn or a previous turn ''' self.common() if interesting_turn: self.latest_dirstr = self.archive_directory + os.sep + 'Turn ' + str(interesting_turn) if 'Turn ' + str(interesting_turn) not in os.listdir(self.game_dropbox + self.archive_directory): print "Can't find turn %s directory: '%s'" % (interesting_turn, self.game_dropbox + self.latest_dirstr) return if self.game_save_filename in os.listdir(self.game_dropbox + self.latest_dirstr): print 'found save for turn', max(self.turn_dictionary.keys()) if not interesting_turn is None or not self.player_turn_filename in os.listdir(self.game_dropbox + self.latest_dirstr) or raw_input('found my turn file already there... r u sure? [y]/[N] : ') == 'y': if self.get_save_name(self.latest_dirstr) in os.listdir(self.game_local): # retrieve the saved save with my turn choices embedded shutil.copy(self.game_local + os.sep + self.get_save_name(self.latest_dirstr), self.game_local + os.sep + self.game_save_filename) self.save_known_turn() elif self.game_save_filename in os.listdir(self.game_local) and filecmp._do_cmp(self.game_local + self.game_save_filename, self.game_dropbox + self.latest_dirstr + os.sep + self.game_save_filename) : # filecmp._do_cmp because I only care about the contents, no metadata shortcuts. print "got it already" else: shutil.copy(self.game_dropbox + self.latest_dirstr + os.sep + self.game_save_filename, self.game_local) if self.player_turn_filename in os.listdir(self.game_dropbox + self.latest_dirstr): shutil.copy(self.game_dropbox + self.latest_dirstr + os.sep + self.player_turn_filename, self.game_local) self.save_known_turn() print "copied"
def replace_with_symlinks(self, lang_dir): ' Replace all identical files with symlinks to save disk space/upload bandwidth ' from calibre import walk base = self.a(lang_dir) for f in walk(base): r = os.path.relpath(f, base) orig = self.j(self.d(base), r) try: sz = os.stat(orig).st_size except EnvironmentError: continue if sz == os.stat(f).st_size and filecmp._do_cmp(f, orig): os.remove(f) os.symlink(os.path.relpath(orig, self.d(f)), f)
def archive(self): notation = self.notation or hashlib.sha1(self.dataset).hexdigest() archive_path = os.path.join(SOURCE_DIRECTORY, 'archive', self.store.slug, notation.replace('/', '-')) archive_graph_name = rdflib.URIRef('{0}archive/{1}'.format(settings.GRAPH_BASE, notation)) data_dump_url = rdflib.URIRef('{0}archive/{1}/{2}/latest.rdf'.format(SOURCE_URL, self.store.slug, notation.replace('/', '-'))) if not os.path.exists(archive_path): os.makedirs(archive_path, 0755) nt_fd, nt_name = tempfile.mkstemp('.nt') rdf_fd, rdf_name = tempfile.mkstemp('.rdf') try: nt_out, rdf_out = os.fdopen(nt_fd, 'w'), os.fdopen(rdf_fd, 'w') for graph_name in self.graph_names: self._graph_triples(nt_out, graph_name) nt_out.close() sort = subprocess.Popen(['sort', '-u', nt_name], stdout=subprocess.PIPE) try: triples = itertools.chain(self._get_metadata(rdflib.URIRef(''), archive_graph_name), parse(sort.stdout, 'nt').get_triples()) serialize(triples, rdf_out, rdf_name) finally: # Make sure stdout gets closed so that if the try block raises # an exception we don't keep a sort process hanging around. sort.stdout.close() sort.wait() rdf_out.close() previous_name = os.path.join(archive_path, 'latest.rdf') # Only update if the file has changed, or hasn't been archived before. if not os.path.exists(previous_name) or not filecmp._do_cmp(previous_name, rdf_name): new_name = os.path.join(archive_path, self.updated.astimezone(pytz.utc).isoformat() + '.rdf') shutil.move(rdf_name, new_name) os.chmod(new_name, 0644) if os.path.exists(previous_name): os.unlink(previous_name) os.symlink(new_name, previous_name) # Upload the metadata to the store using an absolute URI. metadata = self._get_metadata(data_dump_url, archive_graph_name) Uploader.upload([self.store], archive_graph_name, graph=metadata) finally: os.unlink(nt_name) if os.path.exists(rdf_name): os.unlink(rdf_name) self.filter_old_archives(archive_path)
def update_dataset_archive(dataset, store, graph_names, updated): dataset_id = dataset.rsplit('/', 1)[1] archive_path = os.path.join(settings.ARCHIVE_PATH, store.slug, dataset_id) if not os.path.exists(archive_path): os.makedirs(archive_path, 0755) nt_fd, nt_name = tempfile.mkstemp('.nt') rdf_fd, rdf_name = tempfile.mkstemp('.rdf') try: nt_out, rdf_out = os.fdopen(nt_fd, 'w'), os.fdopen(rdf_fd, 'w') for graph in graph_names: _graph_triples(nt_out, graph) nt_out.close() sort = subprocess.Popen(['sort', '-u', nt_name], stdout=subprocess.PIPE) with open(rdf_out, 'w') as sink: RDFXMLSink(NTriplesSource(sort.stdout)).serialize(sink) sort.wait() rdf_out.close() previous_name = os.path.join(archive_path, 'latest.rdf') if not os.path.exists(previous_name) or not filecmp._do_cmp(previous_name, rdf_name): new_name = os.path.join(archive_path, updated.astimezone(pytz.utc).isoformat() + '.rdf') shutil.move(rdf_name, new_name) os.chmod(new_name, 0644) if os.path.exists(previous_name): os.unlink(previous_name) os.symlink(new_name, previous_name) finally: os.unlink(nt_name) if os.path.exists(rdf_name): os.unlink(rdf_name)
def update_event(self, inp=-1): self.set_output_val(0, filecmp._do_cmp(self.input(0), self.input(1)))
def archive(self): notation = self.notation or hashlib.sha1(self.dataset).hexdigest() archive_path = os.path.join(SOURCE_DIRECTORY, 'archive', self.store.slug, notation.replace('/', '-')) archive_graph_name = rdflib.URIRef('{0}archive/{1}'.format(settings.GRAPH_BASE, notation)) data_dump_url = rdflib.URIRef('{0}archive/{1}/{2}/latest.rdf'.format(SOURCE_URL, self.store.slug, notation.replace('/', '-'))) data_dump_with_labels_url = rdflib.URIRef('{0}archive/{1}/{2}/latest-with-labels.rdf'.format(SOURCE_URL, self.store.slug, notation.replace('/', '-'))) if not os.path.exists(archive_path): os.makedirs(archive_path, 0755) nt_fd, nt_name = tempfile.mkstemp('.nt') rdf_fd, rdf_name = tempfile.mkstemp('.rdf') rdf_with_labels_fd, rdf_with_labels_name = tempfile.mkstemp('.rdf') try: nt_out, rdf_out = os.fdopen(nt_fd, 'w'), os.fdopen(rdf_fd, 'w') rdf_with_labels_out = os.fdopen(rdf_with_labels_fd, 'w') for graph_name in self.graph_names: self._graph_triples(nt_out, graph_name) nt_out.close() with tempfile.TemporaryFile() as sorted_triples: subprocess.call(['sort', '-u', nt_name], stdout=sorted_triples) sorted_triples.seek(0) triples = itertools.chain(self._get_metadata(rdflib.URIRef(''), data_dump_with_labels_url, archive_graph_name), parse(sorted_triples, 'nt').get_triples()) serialize(triples, rdf_out, 'rdf') rdf_out.close() sorted_triples.seek(0) triples = itertools.chain(self._get_metadata(rdflib.URIRef(''), data_dump_with_labels_url, archive_graph_name), self.with_labels(parse(sorted_triples, 'nt').get_triples())) serialize(triples, rdf_with_labels_out, 'rdf') rdf_with_labels_out.close() previous_name = os.path.join(archive_path, 'latest.rdf') # Only update if the file has changed, or hasn't been archived before. if not os.path.exists(previous_name) or not filecmp._do_cmp(previous_name, rdf_name): new_name = os.path.join(archive_path, self.updated.astimezone(pytz.utc).isoformat() + '.rdf') shutil.move(rdf_name, new_name) os.chmod(new_name, 0644) if os.path.exists(previous_name): os.unlink(previous_name) os.symlink(new_name, previous_name) new_with_labels_name = os.path.join(archive_path, 'latest-with-labels.rdf') shutil.move(rdf_with_labels_name, new_with_labels_name) os.chmod(new_with_labels_name, 0644) # Upload the metadata to the store using an absolute URI. metadata = self._get_metadata(data_dump_url, data_dump_with_labels_url, archive_graph_name) Uploader.upload([self.store], archive_graph_name, graph=metadata) finally: os.unlink(nt_name) if os.path.exists(rdf_name): os.unlink(rdf_name) self.filter_old_archives(archive_path)