예제 #1
0
def fetch_users():
  # Setup
  asf_jira = JiraConnection.make_connection_from_config('ASF')
  db_conn = PostgreSQLConnection.make_connection_from_config('POSTGRESQL')
  asf_usernames = [row[0] for row in db_conn.fetch_sql('SELECT DISTINCT(assignee) FROM fixed_issues;')]
  
  for name in asf_usernames:
    # EXTRACT
    if DEBUG: print "Begin processing name %s" % name
    user_info = asf_jira.get_user(name)

    # TRANSFORM
    if user_info:
      row_info = {'asf_name': user_info.name,
                  'asf_fullname': user_info.fullname,
                  'asf_email': user_info.email,
                  'asf_email_domain': user_info.email.split('@')[1]}
    else:
      print "No results from ASF JIRA for %s" % name

    # LOAD
    try:
      sql = """\
INSERT INTO contributors (asf_name, asf_fullname, asf_email, asf_email_domain)
VALUES (%(asf_name)s, %(asf_fullname)s, %(asf_email)s, %(asf_email_domain)s)
"""
      db_conn.execute_sql(sql, row_info)
      if DEBUG: print "Successful insertion for %s" % name
    except Exception, e:
      print "Problem inserting %s:" % (name, e)
예제 #2
0
def fetch_fixed_issues():
  # Setup
  start_date = '2010-10-01'
  asf_jira = JiraConnection.make_connection_from_config('ASF')
  cloudera_jira = JiraConnection.make_connection_from_config('CLOUDERA')
  output_db = PostgreSQLConnection.make_connection_from_config('POSTGRESQL')

  sources = [FilterSource(asf_jira, 'Avro', 12315050),
             FilterSource(asf_jira, 'Hadoop', 12315051),
             FilterSource(asf_jira, 'HDFS', 12315052),
             FilterSource(asf_jira, 'MapReduce', 12315053),
             FilterSource(asf_jira, 'HBase', 12315054),
             FilterSource(asf_jira, 'Hive', 12315055),
             FilterSource(asf_jira, 'Pig', 12315056),
             FilterSource(asf_jira, 'Whirr', 12315057),
             FilterSource(asf_jira, 'ZooKeeper', 12315058),
             FixedIssuesSource(cloudera_jira, 'Flume', start_date),
             FixedIssuesSource(cloudera_jira, 'Sqoop', start_date),
             FixedIssuesSource(cloudera_jira, 'Hue', start_date),
            ]

  for source in sources:
    if DEBUG: print "Begin processing source %s" % source.name

    # EXTRACT
    issues = source.fetch_data()
    if DEBUG: print "Fetched %s issues for project %s" % (len(issues), source.name)

    # TRANSFORM
    transformed_issues = IssuesTransform.transform_issues(issues)
    if DEBUG: print "Transformed %s issues successfully for project %s" % (len(transformed_issues), source.name)    
    serialized_issues = IssuesTransform.serialize_issues(transformed_issues)
    if DEBUG: print "Serialized issues: %s" % serialized_issues
    write_tsv(source.ofilename, serialized_issues)

    # LOAD
    output_db.execute_sql("COPY fixed_issues FROM '%s';" % os.path.join(WORKING_DIR, source.ofilename))
    if DEBUG: print "Loaded data for source %s" % source.name

  # Teardown
  asf_jira.close()
  cloudera_jira.close()
  output_db.close()