예제 #1
0
def topData(period, startdate, enddate, identities_db, destdir, bots, npeople):
    top_authors_data =  {}
    top_authors_data['authors.'] = SCM.top_people(0, startdate, enddate, "author" , "" , npeople)
    top_authors_data['authors.last year']= SCM.top_people(365, startdate, enddate, "author", "", npeople)
    top_authors_data['authors.last month']= SCM.top_people(31, startdate, enddate, "author", "", npeople)
    createJSON (top_authors_data, destdir+"/scm-top.json")

    # Top files
    top_files_modified_data = SCM.top_files_modified()

    return top_authors_data
예제 #2
0
def companies_countriesData(period, startdate, enddate, identities_db, destdir):
    companies = SCM.companies_name(startdate, enddate)
    companies = companies['name']
    for company in companies:
        company_name = "'"+company+ "'"
        countries  = SCM.scm_countries_names(identities_db,startdate, enddate)
        countries = countries['name']
        for country in countries :
            print (country, "=>", company)
            data = SCM.scm_companies_countries_evol(identities_db, company, country, nperiod, startdate, enddate)
            data = completePeriodIds(data)
            createJSON (data, destdir + "/"+company+"_"+country+"-scm-evolutionary.json", False)
예제 #3
0
def countriesData(period, startdate, enddate, identities_db, destdir):
    countries  = SCM.scm_countries_names(identities_db,startdate, enddate)
    countries = countries['name']
    createJSON(countries, destdir+"/scm-countries.json")

    for country in countries:
        print (country)
        country_name = "'"+country+"'"

        evol_data = SCM.GetSCMEvolutionaryData(period, startdate, enddate, identities_db, ["country", country_name])
        evol_data = completePeriodIds(evol_data)
        createJSON (evol_data, destdir+"/"+country+"-scm-cou-evolutionary.json")

        agg = SCM.GetSCMStaticData(period, startdate, enddate, identities_db, ["country", country_name])
        createJSON (agg, destdir+"/"+country+"-scm-cou-static.json")
예제 #4
0
    def importToBazaar(self, foreign_tree, bazaar_branch):
        """Actually import `foreign_tree` into `bazaar_branch`.

        :param foreign_tree: A `CVSWorkingTree`.
        :param bazaar_tree: A `bzrlib.branch.Branch`, which must have a
            colocated working tree.
        """
        foreign_directory = foreign_tree.local_path
        bzr_directory = str(bazaar_branch.bzrdir.open_workingtree().basedir)

        scm_branch = SCM.branch(bzr_directory)
        last_commit = cscvs.findLastCscvsCommit(scm_branch)

        # If branch in `bazaar_tree` doesn't have any identifiable CSCVS
        # revisions, CSCVS "initializes" the branch.
        if last_commit is None:
            self._runToBaz(foreign_directory, "-SI", "MAIN.1", bzr_directory)

        # Now we synchronise the branch, that is, import all new revisions
        # from the foreign branch into the Bazaar branch. If we've just
        # initialized the Bazaar branch, then this means we import *all*
        # revisions.
        last_commit = cscvs.findLastCscvsCommit(scm_branch)
        self._runToBaz(foreign_directory, "-SC", "%s::" % last_commit,
                       bzr_directory)
예제 #5
0
    def importToBazaar(self, foreign_tree, bazaar_branch):
        """Actually import `foreign_tree` into `bazaar_branch`.

        :param foreign_tree: A `SubversionWorkingTree` or a `CVSWorkingTree`.
        :param bazaar_tree: A `bzrlib.branch.Branch`, which must have a
            colocated working tree.
        """
        foreign_directory = foreign_tree.local_path
        bzr_directory = str(bazaar_branch.bzrdir.open_workingtree().basedir)

        scm_branch = SCM.branch(bzr_directory)
        last_commit = cscvs.findLastCscvsCommit(scm_branch)

        # If branch in `bazaar_tree` doesn't have any identifiable CSCVS
        # revisions, CSCVS "initializes" the branch.
        if last_commit is None:
            self._runToBaz(
                foreign_directory, "-SI", "MAIN.1", bzr_directory)

        # Now we synchronise the branch, that is, import all new revisions
        # from the foreign branch into the Bazaar branch. If we've just
        # initialized the Bazaar branch, then this means we import *all*
        # revisions.
        last_commit = cscvs.findLastCscvsCommit(scm_branch)
        self._runToBaz(
            foreign_directory, "-SC", "%s::" % last_commit, bzr_directory)
예제 #6
0
def peopleData(period, startdate, enddate, identities_db, destdir, top_authors_data):
    top = top_authors_data['authors.']["id"]
    top += top_authors_data['authors.last year']["id"]
    top += top_authors_data['authors.last month']["id"]
    # remove duplicates
    people = list(set(top))
    # the order is not the same than in R json
    createJSON(people, destdir+"/scm-people.json", False)

    for upeople_id in people :
        evol_data = SCM.GetEvolPeopleSCM(upeople_id, period, startdate, enddate)
        evol_data = completePeriodIds(evol_data)
        createJSON (evol_data, destdir+"/people-"+str(upeople_id)+"-scm-evolutionary.json")

        agg = SCM.GetStaticPeopleSCM(upeople_id,  startdate, enddate)
        createJSON (agg, destdir+"/people-"+str(upeople_id)+"-scm-static.json")

    pass
예제 #7
0
def tsData(period, startdate, enddate, identities_db, destdir, granularity, conf):
#    data = vizr.GetSCMEvolutionaryData(period, startdate, enddate, identities_db)
#    evol_data = completePeriodIds(dataFrame2Dict(data))
    data = SCM.GetSCMEvolutionaryData(period, startdate, enddate, identities_db, None)
    evol_data = completePeriodIds(data)

    if ('companies' in reports) :
        data = SCM.EvolCompanies(period, startdate, enddate)
        evol_data = dict(evol_data.items() + completePeriodIds(data).items())

    if ('countries' in reports) :
        data = SCM.EvolCountries(period, startdate, enddate)
        evol_data = dict(evol_data.items() + completePeriodIds(data).items())

    if ('domains' in reports) :
        data = SCM.EvolDomains(period, startdate, enddate)
        evol_data = dict(evol_data.items() + completePeriodIds(data).items())
 
    createJSON (evol_data, destdir+"/scm-evolutionary.json")
예제 #8
0
def domainsData(period, startdate, enddate, identities_db, destdir):
    domains = SCM.scm_domains_names(identities_db,startdate, enddate)
    domains = domains['name']
    createJSON(domains, destdir+"/scm-domains.json")
    # Some R ts are wrong
    bad_R_json_domains = ['gerrit','gmx','emsenhuber','bitergia']

    for domain in domains :
        domain_name = "'"+domain+"'"
        print (domain_name)

        evol_data = SCM.GetSCMEvolutionaryData(period, startdate, enddate, identities_db, ["domain", domain_name])
        evol_data = completePeriodIds(evol_data)
        if domain in bad_R_json_domains:
            createJSON(evol_data, destdir+"/"+domain+"-scm-dom-evolutionary.json", False)
        else:
            createJSON(evol_data, destdir+"/"+domain+"-scm-dom-evolutionary.json")

        agg = SCM.GetSCMStaticData(period, startdate, enddate, identities_db, ["domain", domain_name])
        createJSON(agg, destdir+ "/"+domain+"-scm-dom-static.json")
예제 #9
0
def reposData(period, startdate, enddate, identities_db, destdir, conf):
    # repos  = dataFrame2Dict(vizr.repos_name(startdate, enddate))
    repos  = SCM.repos_name(startdate, enddate)
    repos = repos['name']
    if not isinstance(repos, (list)): 
        repos = [repos]
        createJSON(repos, destdir+"/scm-repos.json", False)
    else:
        createJSON(repos, destdir+"/scm-repos.json")

    for repo in repos :
        repo_name = "'"+ repo+ "'"
        print (repo_name)

        evol_data = SCM.GetSCMEvolutionaryData(period, startdate, enddate, identities_db, ["repository", repo_name])
        evol_data = completePeriodIds(evol_data)
        createJSON(evol_data, destdir+"/"+repo+"-scm-rep-evolutionary.json")

        agg = SCM.GetSCMStaticData(period, startdate, enddate, identities_db, ["repository", repo_name])
        createJSON(agg, destdir+"/"+repo+"-scm-rep-static.json")
예제 #10
0
def companiesData(period, startdate, enddate, identities_db, destdir, bots, npeople):
    companies  = SCM.companies_name_wo_affs(bots, startdate, enddate)
    companies = companies['name']
    createJSON(companies, destdir+"/scm-companies.json")

    for company in companies:
        company_name = "'"+ company+ "'"
        print (company_name)

        evol_data = SCM.GetSCMEvolutionaryData(period, startdate, enddate, identities_db, ["company", company_name])
        evol_data = completePeriodIds(evol_data)
        createJSON(evol_data, destdir+"/"+company+"-scm-com-evolutionary.json")

        agg = SCM.GetSCMStaticData(period, startdate, enddate, identities_db, ["company", company_name])
        createJSON(agg, destdir+"/"+company+"-scm-com-static.json")

        top_authors = SCM.company_top_authors(company_name, startdate, enddate, npeople)
        createJSON(top_authors, destdir+"/"+company+"-scm-com-top-authors.json", False)

        for i in [2006,2009,2012]:
            data = SCM.company_top_authors_year(company_name, i, npeople)
            createJSON(data, destdir+"/"+company+"-scm-top-authors_"+str(i)+".json", False)

    commits =  SCM.GetCommitsSummaryCompanies(period, startdate, enddate, opts.identities_db, 10)
    createJSON (commits, destdir+"/scm-companies-commits-summary.json")
예제 #11
0
    def _runToBaz(self, source_dir, flags, revisions, bazpath):
        """Actually run the CSCVS utility that imports revisions.

        :param source_dir: The directory containing the foreign working tree
            that we are importing from.
        :param flags: Flags to pass to `totla.totla`.
        :param revisions: The revisions to import.
        :param bazpath: The directory containing the Bazaar working tree that
            we are importing into.
        """
        # XXX: JonathanLange 2008-02-08: We need better documentation for
        # `flags` and `revisions`.
        config = CVS.Config(source_dir)
        config.args = ["--strict", "-b", bazpath, flags, revisions, bazpath]
        totla.totla(config, self._logger, config.args, SCM.tree(source_dir))
예제 #12
0
    def _runToBaz(self, source_dir, flags, revisions, bazpath):
        """Actually run the CSCVS utility that imports revisions.

        :param source_dir: The directory containing the foreign working tree
            that we are importing from.
        :param flags: Flags to pass to `totla.totla`.
        :param revisions: The revisions to import.
        :param bazpath: The directory containing the Bazaar working tree that
            we are importing into.
        """
        # XXX: JonathanLange 2008-02-08: We need better documentation for
        # `flags` and `revisions`.
        config = CVS.Config(source_dir)
        config.args = ["--strict", "-b", bazpath,
                       flags, revisions, bazpath]
        totla.totla(config, self._logger, config.args, SCM.tree(source_dir))
예제 #13
0
def aggData(period, startdate, enddate, identities_db, destdir):
    # data = dataFrame2Dict(vizr.GetSCMStaticData(period, startdate, enddate, identities_db))
    data = SCM.GetSCMStaticData(period, startdate, enddate, identities_db, None)
    agg = data
    static_url = SCM.StaticURL()
    agg = dict(agg.items() + static_url.items())

    if ('companies' in reports):
        data = SCM.evol_info_data_companies (startdate, enddate)
        agg = dict(agg.items() + data.items())

    if ('countries' in reports): 
        data = SCM.evol_info_data_countries (startdate, enddate)
        agg = dict(agg.items() + data.items())

    if ('domains' in reports):
        data = SCM.evol_info_data_domains (startdate, enddate)
        agg = dict(agg.items() + data.items())

    data = SCM.GetCodeCommunityStructure(period, startdate, enddate, identities_db)
    agg = dict(agg.items() + data.items())

    # TODO: repeated data
    # data = dataFrame2Dict(vizr.GetDiffCommitsDays(period, enddate, 365))
    # agg = dict(agg.items() + data.items())

    # Tendencies    
    for i in [7,30,365]:
        data = SCM.GetDiffCommitsDays(period, enddate, identities_db, i)
        agg = dict(agg.items() + data.items())
        data = SCM.GetDiffAuthorsDays(period, enddate, identities_db, i)
        agg = dict(agg.items() + data.items())
        data = SCM.GetDiffFilesDays(period, enddate, identities_db, i)
        agg = dict(agg.items() + data.items())
        data = SCM.GetDiffLinesDays(period, enddate, identities_db, i)
        agg = dict(agg.items() + data.items())

    # Last Activity: to be removed
    for i in [7,14,30,60,90,180,365,730]:
        data = SCM.last_activity(i)
        agg = dict(agg.items() + data.items())

    # Fields with wrong data in R
    skip_fields = ['percentage_removed_lines_30','percentage_added_lines_30','diff_netadded_lines_30','diff_netremoved_lines_30']
    createJSON (agg, destdir+"/scm-static.json", True, skip_fields)
예제 #14
0
    for i in range(len(lis)):
        for k in range(len(lis[i])):
            lis[i][k]=dic[lis[i][k]]
    return lis

if __name__ == "__main__":
    
    data=np.array([[1772.14,568.25,298.66,352.20,307.21,490.83,364.28,202.50],    # 辽宁            # 原始样本矩阵
                   [2752.25,569.95,662.31,541.06,623.05,917.23,599.98,354.39],    # 浙江
                   [1386.76,460.99,312.97,280.78,246.24,407.26,547.19,188.52],    # 河南
                   [1552.77,517.16,402.03,272.44,265.29,563.10,302.27,251.41],    # 甘肃
                   [1711.03,458.57,334.91,307.24,297.72,495.34,274.48,306.45]])   # 青海            # 总计5个样本
    
    dic={0:'辽宁',1:'浙江',2:'河南',3:'甘肃',4:'青海'}
                
    scm=SCM.SCM(classdist='nearest', sampledist='euc')

    res=scm.fit(data,kind=5)                     # 获得分类后的样本下标
    res=replace(res,dic)                         # 替换样本下标为样本标签名
    print("分为5类结果 res = :",res)
    
    res=scm.fit(data,kind=4)                     # 获得分类后的样本下标
    res=replace(res,dic)                         # 替换样本下标为样本标签名
    print("分为4类结果 res = :",res)

    res=scm.fit(data,kind=3)                     # 获得分类后的样本下标
    res=replace(res,dic)                         # 替换样本下标为样本标签名
    print("分为3类结果 res = :",res)

    res=scm.fit(data,kind=2)                     # 获得分类后的样本下标
    res=replace(res,dic)                         # 替换样本下标为样本标签名
예제 #15
0
if __name__ == "__main__":

    data = np.array([[1, 3, 5, 7, 9], [2, 4, 6, 8, 10], [1, 4, 5, 8, 9],
                     [1100, 1300, 1500, 1700, 1900],
                     [1200, 1400, 1600, 1800, 2000],
                     [1100, 1400, 1500, 1800, 1900],
                     [11000, 13000, 15000, 17000, 19000],
                     [12000, 14000, 16000, 18000, 20000],
                     [11000, 14000, 15000, 18000, 19000]])  #12个样本,明显属于3类

    # 1. 欧氏距离

    print("欧氏距离测试: ")

    scm = SCM.SCM(classdist='nearest', sampledist='euc')
    res = scm.fit(data, kind=3)
    print("样本间欧氏距离+类间最近距离: res = ", res)

    scm = SCM.SCM(classdist='farthest', sampledist='euc')
    res = scm.fit(data, kind=3)
    print("样本间欧氏距离+类间最远距离: res = ", res)

    scm = SCM.SCM(classdist='average', sampledist='euc')
    res = scm.fit(data, kind=3)
    print("样本间欧氏距离+类间平均距离: res = ", res)

    scm = SCM.SCM(classdist='centroid', sampledist='euc')
    res = scm.fit(data, kind=3)
    print("样本间欧氏距离+类间重心距离: res = ", res)