Beispiel #1
0
               age_to_plug  = 0
             else:
               age_to_plug = int(formatted_res_age)

             #print ">", age_to_plug
             age_list_.append(age_to_plug)
             age_to_plug=0
             formatted_res_age=''
  tupToRet =(age_list_, country_list_)
  return tupToRet





print "Started at: ", utility.giveTimeStamp()

reputationList = preProcessQuestionInfo(getReputations(), 'REP')
print "******************************"
### Reputation
print "Count of users with reputation"
print len(reputationList)
print "Reputation stats:"
st_ = utility.giveStat(reputationList)
print st_
lh_threhold=st_[3]

# high reputation
high_users = preProcessQuestionInfo(getHighUsers(lh_threhold), 'Id')
high_users.sort()
print "Count of highly reputed users"
Beispiel #2
0
  convert fitted matrix to pandas dataframe: not using CSV, dumping list of strings as pickle in line#80
  '''
    # df_ = utility.dumpTransformedTokenMatrixToCSV(transformed_features, feature_names, reproc_dump_output_file)
    # print df_.shape
    # print 'Dumping completed ...'
    '''
  and then call prediction module
  '''
    tokenization_predictor.performPrediction(iterDumpDir, all_features, labels,
                                             feature_names,
                                             count_vec_flag_param)
    print "=" * 100


if __name__ == '__main__':
    print "Started at", utility.giveTimeStamp()
    print "-" * 125
    dir2save = '/Users/akond/Documents/AkondOneDrive/OneDrive/stvr/output/'

    # dataset_file="/Users/akond/Documents/AkondOneDrive/OneDrive/stvr/dataset/MIRANTIS_FULL_DATASET.csv"
    # reproc_dump_output_file= '/Users/akond/Documents/AkondOneDrive/OneDrive/stvr/reproc/TFIDF_MIR.dump'
    # theCompleteCategFile='/Users/akond/Documents/AkondOneDrive/OneDrive/IaC-Defect-Categ-Project/output/Mirantis_Categ_For_DB.csv'
    # reproc_dump_output_file= '/Users/akond/Documents/AkondOneDrive/OneDrive/stvr/reproc/TFIDF_MIR.csv'

    # dataset_file="/Users/akond/Documents/AkondOneDrive/OneDrive/stvr/dataset/SYNTHETIC_MOZ_FULL_DATASET.csv"
    # reproc_dump_output_file= '/Users/akond/Documents/AkondOneDrive/OneDrive/stvr/reproc/TFIDF_MOZILLA.dump'
    # theCompleteCategFile='/Users/akond/Documents/AkondOneDrive/OneDrive/IaC-Defect-Categ-Project/output/Mozilla.Final.Categ.csv'
    # reproc_dump_output_file= '/Users/akond/Documents/AkondOneDrive/OneDrive/stvr/reproc/TFIDF_MOZILLA.csv'

    # dataset_file="/Users/akond/Documents/AkondOneDrive/OneDrive/stvr/dataset/SYNTHETIC_OPENSTACK_FULL_DATASET.csv"
    # reproc_dump_output_file='/Users/akond/Documents/AkondOneDrive/OneDrive/stvr/reproc/TFIDF_OPENSTACK.dump'
Beispiel #3
0
      if val_=='':
        countto_ret = 0.0
      else:
        countto_ret= float(val_)
    return countto_ret


def getIssueReportsList(author_id_param_list):
   resultantIssueReportList = []
   for author_id_ in author_id_param_list:
       issue_report_count = preProcessCount(getIssueReportFromDB(author_id_))
       resultantIssueReportList.append(issue_report_count)
   return resultantIssueReportList



print "Starting at:", utility.giveTimeStamp()
allTheFemales = preProcessauthorIDs(getAllFemaleAuthors())
print "Identified females:",len(allTheFemales)
allTheMales = preProcessauthorIDs(getAllMaleAuthors())
print "Identified males:",len(allTheMales)
issue_reports_for_females = getIssueReportsList(allTheFemales)
status=utility.dumpContent(issue_reports_for_females, 'F_ALL')
print "Dumped a file of {} bytes".format(status)
print "Total Female reports",sum(issue_reports_for_females)
issue_reports_for_males = getIssueReportsList(allTheMales)
status=utility.dumpContent(issue_reports_for_males, 'M_ALL')
print "Dumped a file of {} bytes".format(status)
print "Total Male reports",sum(issue_reports_for_males)
print "Ending at:", utility.giveTimeStamp()