def run(self): hdfs = luigi.contrib.hdfs.hadoopcli_clients.create_hadoopcli_client() dates = self.version_date() if len(dates) == 0: raise Exception("These's no user data in[%s]" % self.input()[0].fn) exit_code = -1 mr_path = os.path.dirname(self.bin) copyfile("%s/mapreduce.properties.user" % mr_path, "%s/mapreduce.properties" % mr_path) if not os.path.exists(self.local_version): hdfs.remove(self.user_root) hdfs.mkdir(self.user_root) exit_code = mr_cmd(self.bin, "pr.user") if exit_code != 0 or not check_mr_success(self.merge): raise Exception("GetExternalUser failed") hdfs.rename(self.merge, self.output()["user"].path) else: local_dates = [line.strip() for line in open(self.local_version)] latest_dates = set(dates) - set(local_dates) if len(latest_dates) == 0: raise Exception("These's no new arrival user data") hdfs.remove(self.user_root) hdfs.mkdir(self.user_root) hdfs.mkdir(self.increamental_archive) for d in latest_dates: hdfs.copy("%s/%s" % (self.input()[0].path, d), "%s/%s" % (self.increamental_archive, d)) exit_code = mr_cmd(self.bin, "pr.latest.user") if exit_code != 0 or not check_mr_success(self.output()["user"].path): raise Exception("GetExternalUser failed") # make version tag with self.output()["version"].open("w") as version_fd: dates.sort() for d in dates: print >> version_fd, d
def run(self): hdfs = luigi.contrib.hdfs.hadoopcli_clients.create_hadoopcli_client() hdfs.remove(self.paper_root) hdfs.mkdir(self.paper_root) mr_path = os.path.dirname(self.bin) copyfile("%s/mapreduce.properties.doc" % mr_path, "%s/mapreduce.properties" % mr_path) exit_code = mr_cmd(self.bin, "pr.paper") if exit_code != 0 or not check_mr_success(self.output().path): raise Exception("GetExternalPaper failed")
def to_hbase(data_path, jarbin): mr_conf_fn = '%s/DataCreate.xml' % os.path.dirname(jarbin) with open(mr_conf_fn, 'w') as conf_fd: conf_str = '''<?xml version="1.0" encoding="UTF-8"?> <jobs> <hbase name="recommendation 2 hbase" hbasename="user_recommendation_info" regions="10" input="%s" method="create_hfile"/> </jobs>''' conf_str = conf_str % (data_path) print >> conf_fd, conf_str exit_code = mr_cmd(jarbin, "DataCreateBehavior") if exit_code != 0: raise Exception('failed to write recommendation to hbase')