예제 #1
0
def save_model(args, user_map, item_map, row_factor, col_factor):
  """Save the user map, item map, row factor and column factor matrices in numpy format.

  These matrices together constitute the "recommendation model."

  Args:
    args:         input args to training job
    user_map:     user map numpy array
    item_map:     item map numpy array
    row_factor:   row_factor numpy array
    col_factor:   col_factor numpy array
  """
  model_dir = os.path.join(args['output_dir'], 'model')

  # if our output directory is a GCS bucket, write model files to /tmp,
  # then copy to GCS
  gs_model_dir = None
  if model_dir.startswith('gs://'):
    gs_model_dir = model_dir
    model_dir = '/tmp/{0}'.format(args['job_name'])

  os.makedirs(model_dir)
  np.save(os.path.join(model_dir, 'user'), user_map)
  np.save(os.path.join(model_dir, 'item'), item_map)
  np.save(os.path.join(model_dir, 'row'), row_factor)
  np.save(os.path.join(model_dir, 'col'), col_factor)

  if gs_model_dir:
    sh.gsutil('cp', '-r', os.path.join(model_dir, '*'), gs_model_dir)
def save_model(args, user_map, item_map, row_factor, col_factor):
  """Save the user map, item map, row factor and column factor matrices in numpy format.

  These matrices together constitute the "recommendation model."

  Args:
    args:         input args to training job
    user_map:     user map numpy array
    item_map:     item map numpy array
    row_factor:   row_factor numpy array
    col_factor:   col_factor numpy array
  """
  model_dir = os.path.join(args['output_dir'], 'model')

  # if our output directory is a GCS bucket, write model files to /tmp,
  # then copy to GCS
  gs_model_dir = None
  if model_dir.startswith('gs://'):
    gs_model_dir = model_dir
    model_dir = '/tmp/{0}'.format(args['job_name'])

  os.makedirs(model_dir)
  np.save(os.path.join(model_dir, 'user'), user_map)
  np.save(os.path.join(model_dir, 'item'), item_map)
  np.save(os.path.join(model_dir, 'row'), row_factor)
  np.save(os.path.join(model_dir, 'col'), col_factor)

  if gs_model_dir:
    sh.gsutil('cp', '-r', os.path.join(model_dir, '*'), gs_model_dir)
예제 #3
0
def save_model(args, user_map, item_map, row_factor, col_factor):
    """Save the user map, item map, row factor and column factor matrices in numpy format.
    
    These matrices together constitute the "recommendation model."
    Args:
    args:         input args to training job
    user_map:     user map numpy array
    item_map:     item map numpy array
    row_factor:   row_factor numpy array
    col_factor:   col_factor numpy array
    """

    model_dir = os.path.join(args.output_dir, 'model')

    # write model files to /tmp, then copy to GCS
    gs_model_dir = model_dir
    model_dir = '/tmp/{0}'.format(args.job_name)

    os.makedirs(model_dir)
    np.save(os.path.join(model_dir, 'user'), user_map)
    np.save(os.path.join(model_dir, 'item'), item_map)
    np.save(os.path.join(model_dir, 'row'), row_factor)
    np.save(os.path.join(model_dir, 'col'), col_factor)

    sh.gsutil('cp', '-r', os.path.join(model_dir, '*'), gs_model_dir)
예제 #4
0
def save_model(args, user_map, item_map, row_factor, col_factor,
               item_ID_mapping_dd):
    """

  These matrices together constitute the "recommendation model."

  Inputs:
    args:         input args to training job
    user_map:     user map numpy array
    item_map:     item map numpy array
    row_factor:   row_factor numpy array
    col_factor:   col_factor numpy array
    item_ID_mapping_dd: original item ID to rebased item ID mapping
  """

    model_dir = os.path.join(args.output_dir, 'model')

    # if our output directory is a GCS bucket, write model files to /tmp,
    # then copy to GCS
    gs_model_dir = None
    if model_dir.startswith('gs://'):
        gs_model_dir = model_dir
        model_dir = '/tmp/{0}'.format(args.job_name)

    os.makedirs(model_dir)
    np.save(os.path.join(model_dir, 'user'), user_map)
    np.save(os.path.join(model_dir, 'item'), item_map)
    np.save(os.path.join(model_dir, 'row'), row_factor)
    np.save(os.path.join(model_dir, 'col'), col_factor)
    item_ID_mapping_dd.to_csv(os.path.join(model_dir,
                                           'item_ID_mapping_dd.csv'))

    if gs_model_dir:
        sh.gsutil('cp', '-r', os.path.join(model_dir, '*'), gs_model_dir)
예제 #5
0
def ensure_local_file(input_file):
  """
  Ensure the training ratings file is stored locally.
  """
    os.makedirs(input_path)
    tmp_input_file = os.path.join(input_path, os.path.basename(input_file))
    sh.gsutil("cp", "-r", input_file, tmp_input_file)
    return tmp_input_file
예제 #6
0
def ensure_local_file(input_file):
  """
  Ensure the training ratings file is stored locally.
  """
  if input_file.startswith('gs:/'):
    input_path = os.path.join('/tmp/', str(uuid.uuid4()))
    os.makedirs(input_path)
    tmp_input_file = os.path.join(input_path, os.path.basename(input_file))
    sh.gsutil("cp", "-r", input_file, tmp_input_file)
    return tmp_input_file
  else:
    return input_file
def ensure_local_file(input_file):
  """
  Ensure the training ratings file is stored locally.
  """
  if input_file.startswith('gs:/'):
    input_path = os.path.join('/tmp/', str(uuid.uuid4()))
    os.makedirs(input_path)
    tmp_input_file = os.path.join(input_path, os.path.basename(input_file))
    sh.gsutil("cp", "-r", input_file, tmp_input_file)
    return tmp_input_file
  else:
    return input_file
def save_model(r_factor, c_factor, job_dir='.', job_name='myjob'):
    model_dir = os.path.join(job_dir, 'model')
    gs_model_dir = None
    if model_dir.startswith('gs://'):
        gs_model_dir = model_dir
        model_dir = '/tmp/{0}'.format(job_name)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    np.save(os.path.join(model_dir, 'row'), r_factor)
    np.save(os.path.join(model_dir, 'col'), c_factor)

    if gs_model_dir:
        import sh
        sh.gsutil('cp', '-r', os.path.join(model_dir, '*'), gs_model_dir)
예제 #9
0
def save_model(r_factor, c_factor, job_dir = '.', job_name = 'myjob'):
	model_dir = os.path.join(job_dir, 'model')
	gs_model_dir = None
	if model_dir.startswith('gs://'):
		gs_model_dir = model_dir
		model_dir = '/tmp/{0}'.format(job_name)
	if not os.path.exists(model_dir):
		os.makedirs(model_dir)
	np.save(os.path.join(model_dir, 'row'), r_factor)
	np.save(os.path.join(model_dir, 'col'), c_factor)

	if gs_model_dir:
		import sh
		sh.gsutil('cp', '-r', os.path.join(model_dir, '*'), gs_model_dir)
예제 #10
0
def save_user_items_w(args, user_items_w):
  """Save the user map, item map, row factor and column factor matrices in numpy format.

  """
  model_dir = os.path.join(args['output_dir'], 'model')

  # if our output directory is a GCS bucket, write model files to /tmp,
  # then copy to GCS
  gs_model_dir = None
  if model_dir.startswith('gs://'):
    gs_model_dir = model_dir
    model_dir = '/tmp/{0}'.format(args['job_name'])

  # os.makedirs(model_dir)
  
  user_items_w.to_json(os.path.join(model_dir, 'user_item_w.json'),orient='records')

  if gs_model_dir:
    sh.gsutil('cp', '-r', os.path.join(model_dir, '*'), gs_model_dir)
예제 #11
0
 def move(self, path_to_src, path_to_dest):
     self.logger.info("moving %s to %s", path_to_src, path_to_dest)
     path_to_dest = path_to_dest[1:] #remove / from beginning
     path_to_src = path_to_src[1:]
     k = self.bucket.get_key(path_to_src)
     #assume that src is a directory, if the key does not exist 
     #a more secure way would be to check if the directory exists by appending a slash to the path and trying to get it
     is_dir = k == None 
     if not is_dir:
         k.copy(self.bucket, path_to_dest)
         k.delete()
         return
     path_to_dest += '/'  
     path_to_src += '/'  
     listing = self.bucket.list(path_to_src, '/')  
     directories = [d for d in listing if self._is_dir(d)]
     files = [f for f in listing if not self._is_dir(d)]
     for key in directories:
         new_path = path_to_dest+key.name.split(path_to_src,1)[1]
         self.create_directory('/'+new_path[:-1])
     for key in files:
         new_path = path_to_dest+key.name.split(path_to_src,1)[1]
         key.copy(self.bucket, new_path)
     gsutil('rm', 'gs://%s/%s**' % (self.bucket_name, path_to_src) )
예제 #12
0
 def get_used_space(self):
     self.logger.debug("retrieving used space")
     return int( awk(gsutil('du', '-s', 'gs://cloudfusion'), '{print $1}') )