예제 #1
0
    def run(self):
        hdfs = luigi.contrib.hdfs.hadoopcli_clients.create_hadoopcli_client()
        dates = self.version_date()
        if len(dates) == 0:
            raise Exception("These's no user data in[%s]" % self.input()[0].fn)

        exit_code = -1
        mr_path = os.path.dirname(self.bin)
        copyfile("%s/mapreduce.properties.user" % mr_path, "%s/mapreduce.properties" % mr_path)
        if not os.path.exists(self.local_version):
            hdfs.remove(self.user_root)
            hdfs.mkdir(self.user_root)
            exit_code = mr_cmd(self.bin, "pr.user")
            if exit_code != 0 or not check_mr_success(self.merge):
                raise Exception("GetExternalUser failed")
            hdfs.rename(self.merge, self.output()["user"].path)
        else:
            local_dates = [line.strip() for line in open(self.local_version)]
            latest_dates = set(dates) - set(local_dates)
            if len(latest_dates) == 0:
                raise Exception("These's no new arrival user data")
            hdfs.remove(self.user_root)
            hdfs.mkdir(self.user_root)
            hdfs.mkdir(self.increamental_archive)
            for d in latest_dates:
                hdfs.copy("%s/%s" % (self.input()[0].path, d), "%s/%s" % (self.increamental_archive, d))
            exit_code = mr_cmd(self.bin, "pr.latest.user")
            if exit_code != 0 or not check_mr_success(self.output()["user"].path):
                raise Exception("GetExternalUser failed")
                # make version tag
        with self.output()["version"].open("w") as version_fd:
            dates.sort()
            for d in dates:
                print >> version_fd, d
예제 #2
0
 def run(self):
     hdfs = luigi.contrib.hdfs.hadoopcli_clients.create_hadoopcli_client()
     hdfs.remove(self.paper_root)
     hdfs.mkdir(self.paper_root)
     mr_path = os.path.dirname(self.bin)
     copyfile("%s/mapreduce.properties.doc" % mr_path, "%s/mapreduce.properties" % mr_path)
     exit_code = mr_cmd(self.bin, "pr.paper")
     if exit_code != 0 or not check_mr_success(self.output().path):
         raise Exception("GetExternalPaper failed")
예제 #3
0
def to_hbase(data_path, jarbin): 
	mr_conf_fn = '%s/DataCreate.xml' % os.path.dirname(jarbin)
	with open(mr_conf_fn, 'w') as conf_fd:
		conf_str = '''<?xml version="1.0" encoding="UTF-8"?>
		<jobs>
		<hbase name="recommendation 2 hbase" hbasename="user_recommendation_info" regions="10" input="%s" method="create_hfile"/>
		</jobs>'''
		conf_str = conf_str % (data_path)
		print >> conf_fd, conf_str
	exit_code = mr_cmd(jarbin, "DataCreateBehavior")
	if exit_code != 0:
		raise Exception('failed to write recommendation to hbase')